R Markdown

Generating the basic objects here a combination of minfi and sesame were used

load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/Ext_deconv_sesame.RData")
annot<-as.data.frame(readRDS("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/EPIC.hg19.manifest.rds"))
#head(annot)

annot<-annot[rownames(annot)%in% rownames(Ext_deconv_betas),]

#filter crossreactive, problematic probes, X, Y and ch
Ext_deconv_betas2<-Ext_deconv_betas[rownames(Ext_deconv_betas)%in%rownames(annot[annot$seqnames!="chrY" & 
                                            annot$seqnames!="chrX" & 
                                            annot$probeType=="cg" &
                                            annot$MASK_general==FALSE,]),]

dim(Ext_deconv_betas2)
## [1] 746980    123
Ext_deconv_betas3<-Ext_deconv_betas2[complete.cases(Ext_deconv_betas2),]

dim(Ext_deconv_betas3)
## [1] 675992    123

Modified functions of ENmix, sesame, and IDOL are used

IDOL baseline functions were optimized using future parameters

#General QC Are sex and karyotype OK? Are the same subjects clustered together? Are the genetic ancestries corresponding to the broad self-reported ethnicity? “White” subjects are part of the broader Indo-European class. The genetic markers only allow broader categories.

library(FlowSorted.BloodExtended.EPIC)
epicData<-FlowSorted.BloodExtended.EPIC[,FlowSorted.BloodExtended.EPIC$CellType!="MIX"]
table(epicData$CellType)
## 
##    Bas   Bmem    Bnv CD4mem  CD4nv CD8mem  CD8nv    Eos   Mono    Neu     NK 
##      6      6      4      4      5      4      5      4      5      6      4 
##   Treg 
##      3
MsetEx<-preprocessRaw(epicData)#Data has been processed through sesame

GMsetEx <- mapToGenome(MsetEx)
estSex <- getSex(GMsetEx)
GMsetEx <- addSex(GMsetEx, sex = estSex)
## Warning in .pDataAdd(object, sex): replacing the following columns in
## colData(object): predictedSex
plotSex(GMsetEx, id= estSex$predictedSex)
plotSex(GMsetEx, id= GMsetEx$Sex)

table(GMsetEx$Sex,estSex$predictedSex,exclude=NULL)
##    
##      F  M
##   F 15  0
##   M  0 41
pheno<-as.data.frame(pData(epicData))
table(pheno$karyotype)
## 
## XaXi  XaY 
##   15   41
# Define color for each of the 3 iris species
groups<-c( Bas= "#E41A1C", 
           #Bcell= "lightblue", 
           Bnv="#1B9E77", Bmem= "magenta", 
           #CD4T="darkorange", 
           CD4mem="darkblue", CD4nv="#D95F02", 
           #CD8T="pink",
           CD8nv ="#7570B3",CD8mem="maroon", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", 
           Treg="black")#, WBC="gray", MIX="lightgray")
colors <- groups
colors <- colors[epicData$CellType]#if not names use as.numeric to color

# Define shapes
shapes = seq(from=12, to=25, by=1)
shapes <- shapes[as.numeric(as.factor(epicData$CellType))]

# Plot
plot(x = epicData$Age, y = epicData$mage, frame = FALSE,
     xlab = "Age (years)", ylab = "Methylation age",
     col = colors, pch = shapes)
legend("topright", legend = names(groups),
      col =  groups,
      pch = seq(from=12, to=25, by=1) )
abline(0,1)

#Select the subset

betas_sesame<-Ext_deconv_betas[, colnames(Ext_deconv_betas)%in%rownames(pheno)]
identical (colnames(betas_sesame),rownames(pheno))
## [1] TRUE
#All the data
annotation_col=data.frame(Sex= epicData$Sex, mSex= epicData$predictedSex, CellType=epicData$CellType, 
                          Age=epicData$Age, mage=epicData$Horvath_age, Ethnicity= epicData$Ethnicity_wide, 
                          methnicity=epicData$meth_ethnicity,frac_na=epicData$frac_na, purity=epicData$meth_purity,
                          row.names = colnames(epicData))

ann_colors = list(
    CellType = groups, Ethnicity = c("African-American"="black", "East-Asian"="yellow", "Indo-European"="pink", "Mixed"="brown")
) 
pheatmap(getSnpBeta(epicData), annotation_colors = ann_colors,
             #      annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "SNPs", cluster_rows = T, show_rownames = T, labels_col = epicData$CellType)

#Filtered data
SNPcheck(betas_sesame)

probes <- names(sesameDataGet("HM450.probeInfo")$probe2chr.hg19)
snp.probes <- probes[grep("rs", probes)]
snp.probes <- intersect(snp.probes, rownames(betas_sesame))

annotation_col=data.frame(Sex= pheno$Sex, mSex= pheno$predictedSex, CellType=pheno$CellType, 
                          Age=pheno$Age, mage=pheno$Horvath_age, Ethnicity= pheno$Ethnicity_wide, 
                          methnicity=pheno$meth_ethnicity,frac_na=pheno$frac_na, purity=pheno$meth_purity,
                          row.names = rownames(pheno))

pheatmap(betas_sesame[snp.probes, ], annotation_colors = ann_colors,#annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "SNPs", cluster_rows = T, show_rownames = T, labels_col = epicData$CellType)

plot(x = pheno$Age, y = pheno$Horvath_age, frame = FALSE,
     xlab = "Age (years)", ylab = "Methylation age Horvath",
     col = colors, pch = shapes)
legend("topright", legend = names(groups),
      col =  groups,
      pch = seq(from=12, to=25, by=1) )
abline(0,1)

probes <- names(sesameDataGet("HM450.probeInfo")$probe2chr.hg19)
snp.probes <- probes[grep("rs", probes)]
snp.probes <- intersect(snp.probes, rownames(betas_sesame))

allsnps<-rbind(betas_sesame[snp.probes, ], Ext_deconv_SNP[,colnames(betas_sesame)])
pheatmap(allsnps, annotation_colors = ann_colors,#      annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "All potential SNPs", cluster_rows = T, show_rownames = F, labels_col = epicData$CellType)

mdsPlot(allsnps, sampGroups = annotation_col$methnicity)

#SNPs organized
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/snpall.rda")

allsnps2<-allsnps[rownames(allsnps)%in%rownames(snpall),]
snpall2<-snpall[rownames(snpall)%in%rownames(allsnps),]
snpall2<-snpall2[rownames(allsnps2),]
allsnps2[snpall2$U=="ALT",]<- 1-allsnps2[snpall2$U=="ALT",]
allsnps2<-ifelse(allsnps2<0.2, 0, 
                 ifelse(allsnps2>0.8, 1, 0.5))
allsnps3<-matrix(data=NA, nrow = nrow(allsnps2), ncol = ncol(allsnps2),
                 dimnames = list(rownames(allsnps2), colnames(allsnps2)) )
identical(rownames(allsnps3), rownames(snpall2))
## [1] TRUE
for (i in 1:length(rownames(allsnps3))){
  allsnps3[i,]<-ifelse(allsnps2[i,]==0, paste0(snpall2$REF[i],snpall2$REF[i]),
                       ifelse(allsnps2[i,]==1, paste0(snpall2$ALT[i],snpall2$ALT[i]), 
                              paste0(snpall2$REF[i],snpall2$ALT[i])))
}
annotation_col=data.frame(Sex= pheno$Sex, #mSex= pheno$predictedSex, 
                          CellType=pheno$CellType, 
                          #Age=pheno$Age, mage=pheno$Horvath_age, 
                          Ethnicity= pheno$Ethnicity_wide, 
                          SeSaMe_ethnicity=pheno$meth_ethnicity,#frac_na=pheno$frac_na, purity=pheno$meth_purity,
                          row.names = rownames(pheno))
dput(levels(as.factor(annotation_col$SeSaMe_ethnicity)))
## c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
ann_colors = list(
    CellType = groups, Ethnicity = c("African-American"="black", "East-Asian"="yellow", "Indo-European"="pink", "Mixed"="brown"), SeSaMe_ethnicity = c("ASIAN"="yellow", "BLACK OR AFRICAN AMERICAN"="black", "WHITE"="pink")
) 

pheatmap(allsnps2[,epicData$CellType!="MIX"], annotation_colors = ann_colors,#      annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "All potential SNPs", cluster_rows = T, labels_row = snpall2$rs, labels_col = epicData$CellType[epicData$CellType!="MIX"], display_numbers = allsnps3[,epicData$CellType!="MIX"])

#SNPs organized according to genetic context
#load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/snpall.rda")

allsnps2<-getSnpBeta(epicData)
allsnps2<-allsnps2[rownames(allsnps2)%in%rownames(snpall),]
snpall2<-snpall[rownames(snpall)%in%rownames(allsnps2),]
snpall2<-snpall2[rownames(allsnps2),]
allsnps2[snpall2$U=="ALT",]<- 1-allsnps2[snpall2$U=="ALT",]
allsnps2<-ifelse(allsnps2<0.2, 0, 
                 ifelse(allsnps2>0.8, 1, 0.5))
allsnps3<-matrix(data=NA, nrow = nrow(allsnps2), ncol = ncol(allsnps2),
                 dimnames = list(rownames(allsnps2), colnames(allsnps2)) )
identical(rownames(allsnps3), rownames(snpall2))
## [1] TRUE
for (i in 1:length(rownames(allsnps3))){
  allsnps3[i,]<-ifelse(allsnps2[i,]==0, paste0(snpall2$REF[i],snpall2$REF[i]),
                       ifelse(allsnps2[i,]==1, paste0(snpall2$ALT[i],snpall2$ALT[i]), 
                              paste0(snpall2$REF[i],snpall2$ALT[i])))
}

annotation_col=data.frame(Sex= pheno$Sex, #mSex= pheno$predictedSex, 
                          CellType=pheno$CellType, 
                          #Age=pheno$Age, mage=pheno$Horvath_age, 
                          Ethnicity= pheno$Ethnicity_wide, 
                          SeSaMe_ethnicity=pheno$meth_ethnicity,#frac_na=pheno$frac_na, purity=pheno$meth_purity,
                          row.names = rownames(pheno))
dput(levels(as.factor(annotation_col$SeSaMe_ethnicity)))
## c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
ann_colors = list(
    CellType = groups, Ethnicity = c("African-American"="black", "East-Asian"="yellow", "Indo-European"="pink", "Mixed"="brown"), SeSaMe_ethnicity = c("ASIAN"="yellow", "BLACK OR AFRICAN AMERICAN"="black", "WHITE"="pink")
) 

pheatmap(allsnps2, annotation_colors = ann_colors,#      annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "Technical SNPs", cluster_rows = T, show_rownames = T, labels_col = epicData$CellType, display_numbers = allsnps3)

#Without filtering only reorganizing the 21 tracking to the minor allele

#SNPs organized
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/snpall.rda")

allsnps2<-allsnps
snpall2<-snpall
allsnps2[rownames(allsnps)%in%rownames(snpall[snpall$U=="ALT",]),]<- 1-allsnps2[rownames(allsnps)%in%rownames(snpall[snpall$U=="ALT",]),]
allsnps2<-ifelse(allsnps2<0.2, 0, 
                 ifelse(allsnps2>0.8, 1, 0.5))

annotation_col=data.frame(Sex= pheno$Sex,  
                          CellType=pheno$CellType, 
                          Ethnicity= pheno$Ethnicity_wide, 
                          SeSaMe_ethnicity=pheno$meth_ethnicity,
                          row.names = rownames(pheno))
dput(levels(as.factor(annotation_col$SeSaMe_ethnicity)))
## c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
annotation_col$SeSaMe_ethnicity<-ifelse(annotation_col$SeSaMe_ethnicity=="ASIAN", "Asian",
                                        ifelse(annotation_col$SeSaMe_ethnicity=="BLACK OR AFRICAN AMERICAN", "Black",
                                               "White"))
ann_colors = list(
    CellType = groups, Ethnicity = c("African-American"="black", "East-Asian"="brown", "Indo-European"="pink", "Mixed"="red"), SeSaMe_ethnicity = c("Asian"="brown", "Black"="black", "White"="pink")
) 

pheatmap(allsnps2[,epicData$CellType!="MIX"], annotation_colors = ann_colors,
         annotation_col = annotation_col, main = "All potential SNPs", 
         cluster_rows = T, show_rownames = F, 
         labels_col = epicData$CellType[epicData$CellType!="MIX"])

Are the samples distinct enough to allow separating the cell types?

#All samples masked probes Zhou and poor quality


#load(file = "Ext_lib_sesame.RData")

#Only high quality
#Selected samples based on back projection of the average
#Confirm high purity
filter1<-rownames(pheno[pheno$purity>=85,])

pheno3<-pheno[rownames(pheno)%in%filter1 ,]
table(pheno3$CellType)
## 
##    Bas   Bmem    Bnv CD4mem  CD4nv CD8mem  CD8nv    Eos   Mono    Neu     NK 
##      6      6      4      4      5      4      5      4      5      6      4 
##   Treg 
##      3
beta3<-betas_sesame[, colnames(betas_sesame)%in%rownames(pheno3)]
identical(colnames(beta3), rownames(pheno3))
## [1] TRUE
myeloid<-c( "Bas", "Eos", "Mono", "Neu" )
Tcells<-c("CD4mem","CD4nv" ,"CD8nv" ,"CD8mem","Treg")
Bcells<-c("Bmem","Bnv" )
CD8subtypes<-c( "CD8nv","CD8mem")

for (i in c("myeloid", "Tcells", "Bcells",  "CD8subtypes")){
  mdsPlot(beta3[, pheno3$CellType%in%get(i)], 
        sampGroups = pheno3$CellType[pheno3$CellType%in%get(i)],
        sampNames = pheno3$CellType[pheno3$CellType%in%get(i)], 
        legendPos = "topleft", legendNCol = 9, pal = groups[get(i)])
}

Are the control probes OK? Is the variance explained by specific phenotypes?

#plot controls

plotCtrl2(epicData,IDorder=NULL)
## Plotting  STAINING .jpg

## Plotting  EXTENSION .jpg

## Plotting  HYBRIDIZATION .jpg

## Plotting  TARGET_REMOVAL .jpg

## Plotting  BISULFITE_CONVERSION_I .jpg

## Plotting  BISULFITE_CONVERSION_II .jpg

## Plotting  SPECIFICITY_I .jpg

## Plotting  SPECIFICITY_II .jpg

## Plotting  NON-POLYMORPHIC .jpg

## Plotting  NEGATIVE .jpg

## Plotting  NORM_A .jpg

## Plotting  NORM_C .jpg

## Plotting  NORM_G .jpg

## Plotting  NORM_T .jpg

## Plotting  NORM_ACGT .jpg

betasnoob<-getBeta(epicData)
#head(pData(MsetEx))

annotation_col=data.frame(Sex= pheno$Sex, mSex= pheno$predictedSex, CellType=pheno$CellType, 
                          Age=pheno$Age, mage=pheno$Horvath_age, Ethnicity= pheno$Ethnicity_wide, 
                          methnicity=pheno$meth_ethnicity,frac_na=pheno$frac_na, purity=pheno$purity,
                          mpurity=pheno$meth_purity,
                          row.names = rownames(pheno))
#Unfiltered
pcrplot2(betasnoob[, colnames(betasnoob)%in%rownames(pheno)],annotation_col,npc=20)
## Analysis is running, please wait...!
## Top  20  principal components can explain  85.58593 % of data 
##     variation 
## Top  1  principal components can explain  32.55619 % of data 
##     variation 
## Top  2  principal components can explain  47.50366 % of data 
##     variation 
## Top  3  principal components can explain  55.30803 % of data 
##     variation 
## Top  4  principal components can explain  62.35569 % of data 
##     variation
#Filtered problematic samples

annotation_col=data.frame(Sex= pheno3$Sex, mSex= pheno3$predictedSex, CellType=pheno3$CellType, 
                          Age=pheno3$Age, mage=pheno3$Horvath_age, Ethnicity= pheno3$Ethnicity_wide, 
                          methnicity=pheno3$meth_ethnicity,frac_na=pheno3$frac_na, purity=pheno3$purity,
                          mpurity=pheno3$meth_purity,
                          row.names = rownames(pheno3))
pcrplot2(betasnoob[, colnames(betasnoob)%in%colnames(beta3)],annotation_col,npc=20)
## Analysis is running, please wait...!
## Top  20  principal components can explain  85.58593 % of data 
##     variation 
## Top  1  principal components can explain  32.55619 % of data 
##     variation 
## Top  2  principal components can explain  47.50366 % of data 
##     variation 
## Top  3  principal components can explain  55.30803 % of data 
##     variation 
## Top  4  principal components can explain  62.35569 % of data 
##     variation

Principal component regression analysis

dput(levels(as.factor(pheno3$CellType)))
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
pheno4<-pheno3[pheno3$CellType%in%c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg"),]
#Filtered problematic samples

annotation_col=data.frame(CellType=pheno4$CellType, 
                          Sex= pheno4$Sex, 
                          Age=pheno4$Age,
                          mAge=pheno4$Horvath_age,
                          Ethnicity= pheno4$Ethnicity_wide,
                          mEthnicity= pheno4$meth_ethnicity,
                          weight=pheno4$weight_kg, 
                          height=pheno4$height_cm,
                          BMI=pheno4$bmi,
                          Smoker=pheno4$smoker,
                          FACS_purity=pheno4$purity,
                          mPurity=pheno4$meth_purity,
                          row.names = rownames(pheno4))
pcrplot2(betasnoob[, colnames(betasnoob)%in%rownames(pheno4)],annotation_col,npc=20)
## Analysis is running, please wait...!

## Top  20  principal components can explain  85.58593 % of data 
##     variation 
## Top  1  principal components can explain  32.55619 % of data 
##     variation 
## Top  2  principal components can explain  47.50366 % of data 
##     variation 
## Top  3  principal components can explain  55.30803 % of data 
##     variation 
## Top  4  principal components can explain  62.35569 % of data 
##     variation

Are the library samples pure enough? USing the IDOL-6 we can examine how they are related to the six broader categories

Cell projections purity barplots

stack<-pheno4[,c("Bcell","CD4T",  "CD8T", "Neu",  "Mono", "NK" )]
stack<-round(stack*100/rowSums(stack),2)
try(boxplot(stack))
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf

## Error in plot.window(xlim = xlim, ylim = ylim, log = log, yaxs = pars$yaxs) : 
##   need finite 'ylim' values
stack<-t(stack)
dat2 <- melt(stack)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
pheno4$label<-paste0(pheno4$CellType,"_", seq(along = pheno4$CellType))
fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) + 
    geom_bar(stat="identity") +
    xlab("\nSample") +
    ylab("Cell %\n") +
    theme_bw()+
    ggtitle("Cell projection IDOL") + scale_fill_manual(values=c("#1B9E77", "#D95F02", "#7570B3",  
                                                                 "#66A61E","#E7298A", 
                                                                 "#E6AB02"))+
    theme(axis.text.x=element_text(angle = -45, hjust = 0))+
    theme(legend.title=element_blank()) +
    scale_x_discrete(labels=pheno3$label)
#theme(legend.position="none")
#detach(pheno2)
try(fetaladultspine)
## Warning: Removed 336 rows containing missing values (position_stack).

for (i in dput(levels(as.factor(pheno4$CellType)))){
    stack<-pheno4[pheno4$CellType==i,c("Bcell","CD4T",  "CD8T", "Neu",  "Mono", "NK" )]
    stack<-round(stack*100/rowSums(stack),2)
    stack<-t(stack)
    dat2 <- melt(stack)
    colors1<-c(Bcell= "#1B9E77", CD4T="#D95F02", CD8T= "#7570B3",  
               Mono= "#E7298A", Neu= "#66A61E",
               NK= "#E6AB02")
    colors2<-colors1[levels(dat2$X1)]
    fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) + 
        geom_bar(stat="identity") +
        xlab("") +
        ylab("") + 
        geom_hline(yintercept=70, linetype="dashed", color = "red")+ 
        theme_bw()+
        ggtitle(paste("Cell projection IDOL", i)) + scale_fill_manual(values=colors2)+
        theme(axis.text.x=element_blank())+
        theme(legend.title=element_blank(), legend.position = "none") 
    assign(x = paste0(i,"_bar"),value = fetaladultspine)   
}
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
try(multiplot(  Mono_bar, Bas_bar, Neu_bar,Eos_bar, 
            CD4nv_bar, CD4mem_bar, Treg_bar,  NK_bar,
            CD8nv_bar, CD8mem_bar,
            Bnv_bar, Bmem_bar, 
            cols=4))
## 
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
## 
##     pattern
## Warning: Removed 30 rows containing missing values (position_stack).
## Warning: Removed 36 rows containing missing values (position_stack).

## Warning: Removed 36 rows containing missing values (position_stack).
## Warning: Removed 24 rows containing missing values (position_stack).
## Warning: Removed 30 rows containing missing values (position_stack).
## Warning: Removed 24 rows containing missing values (position_stack).
## Warning: Removed 18 rows containing missing values (position_stack).
## Warning: Removed 24 rows containing missing values (position_stack).
## Warning: Removed 30 rows containing missing values (position_stack).
## Warning: Removed 24 rows containing missing values (position_stack).

## Warning: Removed 24 rows containing missing values (position_stack).
## Warning: Removed 36 rows containing missing values (position_stack).

Examining some top genes in the IDOL-6 and how the most variable probes cluster the cell types

#beta3

table(pheno3$CellType)
## 
##    Bas   Bmem    Bnv CD4mem  CD4nv CD8mem  CD8nv    Eos   Mono    Neu     NK 
##      6      6      4      4      5      4      5      4      5      6      4 
##   Treg 
##      3
identical(colnames(beta3), rownames(pheno3))
## [1] TRUE
beta3a=Ext_deconv_betas3[, colnames(beta3)]
colnames(beta3a)<-paste0(pheno3$CellType,seq(1:length(colnames(beta3a))))

#Top genes in the IDOL-6 library
visualizeGene("CD8A",beta3a,platform = "EPIC",refversion = "hg38")#, cluster.samples = TRUE)

visualizeGene("BLK",beta3a,platform = "EPIC",refversion = "hg38")

visualizeGene("RPTOR",beta3a,platform = "EPIC",refversion = "hg38")

visualizeGene("CLASP1",beta3a,platform = "EPIC",refversion = "hg38")

visualizeGene("NFIA",beta3a,platform = "EPIC",refversion = "hg38")

visualizeGene("SLFN5",beta3a,platform = "EPIC",refversion = "hg38")

groups<-c( Bas= "#E41A1C", Eos="#3E8E93",  Neu="#66A61E", Mono="#E7298A",
           Bnv="#1B9E77", Bmem= "magenta", 
           CD4nv="#D95F02", CD4mem="darkblue", Treg="black",
           CD8nv ="#7570B3",CD8mem="maroon", NK="#E6AB02",MIX="white")

ann_colors = list(
    CellType = groups
) 
annotation_col=data.frame(CellType=pheno3$CellType, row.names = colnames(beta3))

pheatmap::pheatmap(getBeta(FlowSorted.BloodExtended.EPIC)[IDOLOptimizedCpGs,], color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), 
                   annotation_colors = ann_colors,
                   #      annotation_row = annotation_row, 
                   annotation_col = annotation_col, 
                   main = "IDOL six unfiltered",show_rownames = FALSE,
                   cluster_cols = T, cluster_rows = T, labels_col = pheno3$CellType)

totalna<-rowSums(is.na(beta3[IDOLOptimizedCpGs,]))




annotation_col=data.frame(CellType=pheno3$CellType, row.names = rownames(pheno3))

pheatmap::pheatmap(beta3a[rownames(beta3a)%in%IDOLOptimizedCpGs,], color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), 
                   annotation_colors = ann_colors,
                   #      annotation_row = annotation_row, 
                   annotation_col = annotation_col, 
                   main = "IDOL six unfiltered",show_rownames = FALSE,
                   cluster_cols = T, cluster_rows = T,  labels_col = pheno3$CellType)

mdsPlot(beta3, sampNames = pheno3$CellType, sampGroups = pheno3$CellType, pal = groups[levels(as.factor(pheno3$CellType))],legendNCol = 8, legendPos = "topleft")

#tmp3<-getBeta(Ext_deconv_rgset)[,Ext_deconv_pheno$CellType=="WBC"]
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/Raw_data_mix.RData")
tmp3<-getBeta(MsetEx)[,Ext_deconv_pheno$CellType=="WBC"]
tmppheno<-Ext_deconv_pheno[Ext_deconv_pheno$CellType=="WBC",]
stack<-as.data.frame(tmppheno[tmppheno$CellType=="WBC",c("Basophil","Bcell","CD4T",  "CD8T", "Eos","Neu",  "Mono", "NK", "Treg" )])
sumage<-rowSums(stack)
stack<-round(stack*100/sumage,1)
stack$Bas<-stack$Basophil


data("IDOLOptimizedCpGs.compTable")

stack2<-stack
stack2$CD4T<-stack2$CD4T+stack2$Treg
stack2$Gran<-stack2$Neu+stack2$Eos+stack2$Bas

cellcountsbase<-projectCellType_CP(tmp3[rownames(IDOLOptimizedCpGs.compTable),], as.matrix(IDOLOptimizedCpGs.compTable), lessThanOne = TRUE)
cellcountsbase<-round(100*cellcountsbase,2)

head(stack)
##                     Basophil Bcell CD4T CD8T Eos  Neu Mono  NK Treg Bas
## 202163530006_R02C01      0.1   1.9 19.3  6.6 5.9 56.8  6.5 2.1  0.8 0.1
## 202163530006_R03C01      0.3   5.3 20.2 11.0 3.1 48.6  5.8 5.3  0.4 0.3
## 202163530006_R04C01      0.1   4.4 12.3  6.0 1.0 66.9  6.2 2.8  0.2 0.1
## 202163530006_R06C01      0.0   7.2 26.5 13.4 6.2 37.3  6.0 2.7  0.7 0.0
## 202163530006_R07C01      1.1   4.1 11.4  4.9 2.8 66.5  6.2 2.7  0.3 1.1
## 202163530006_R08C01      0.9   6.2 16.9 16.0 5.8 39.3  9.1 5.0  0.8 0.9
cellcountsval<-projectCellType_CP(tmp3[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)

cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem


cellTypes<-c("Bcell","CD4T",  "CD8T", "Gran",  "Mono", "NK")
cellcolors<-c(  Bcell="#1B9E77", 
                   CD4T="#D95F02", CD8T ="#7570B3",
           Gran="#66A61E", Mono="#E7298A",NK="#E6AB02",  
           Treg="black")
truevalues<-stack2
cellcount_EPICIDOL<-as.data.frame(cellcountsbase)
cellcount_EPICIDOL$Gran<-cellcount_EPICIDOL$Neu
K = length(cellTypes)  # number of cell types


cellTypes<-c("Bas", "Bcell","CD4T",  "CD8T", "Eos","Neu",  "Mono", "NK", "Treg" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", 
                   CD4T="#D95F02", CD8T ="#7570B3",
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", 
           Treg="black")
truevalues<-stack
cellcount_EPICIDOL<-cellcountsval

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))
for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)
    rangexy = range(c(pred,obs))

}
## Warning in cor(obs, pred, method = "pearson"): the standard deviation is zero
cellTypes<-c("Bas", "Bcell","CD4T",  "CD8T", "Eos","Neu",  "Mono", "NK", "Treg" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", 
           Treg="black")

truevalues<-stack
cellcount_EPICIDOL<-cellcountsval


# Scatter Pieplots
stack2<-t(stack[,cellTypes])
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(dat2$X2),]
dat2<-dat2[order(dat2$X1),]

stack3<-t(cellcountsval[,colnames(cellcountsval)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#NKT = "#7E6E85", 
NK = "#E6AB02")#, WBC = "white")

for (i in names(groups))
dat2[,i]<-0

for(i in c("Bas", "Eos","Neu",  "Mono", "NK", "Treg" ))
  dat2[dat2$X1==i,i]<-cellcountsval[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem")]<-cellcountsval[,c("CD4nv","CD4mem")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcountsval[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcountsval[,c("Bnv","Bmem")]


dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval1<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=1), data=dat2, cols=c("Bas", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +#xlab("\nSample") +
        ylab("Predicted (%)") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        #guides(fill=T) +
        theme_classic(base_size = 16)+ #theme(text = element_text(size=rel(2)))+
        ggtitle("Cell projection extended versus FACS and CBC data") + scale_fill_manual(values=groups)+
        #theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval1

#counts
cell_counts<-read.csv("FCM_counts.csv", row.names = 1, header = TRUE)


cellTypes<-c("Bas", "Bcell","CD4T",  "CD8T", "Eos","Neu",  "Mono", "NK", "Treg" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", #CD4mem="darkblue", 
                   CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85", 
           Treg="black")
identical(rownames(stack), rownames(cell_counts))
## [1] TRUE
truevalues0<-stack
truevalues<-matrix(NA,nrow=nrow(stack), ncol = ncol(stack), dimnames = list(rownames(stack), colnames(stack)))
for (i in cellTypes){
  truevalues[,i]<-cell_counts[, paste0(i,"_counts")]
}
cellcount_EPICIDOL<-(cellcountsval*cell_counts$WBC_counts)/100

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), 
                    RMSE= rep(NA, length(cellTypes)),wRMSE =rep(NA, length(cellTypes)),
                    normal_count =rep(NA, length(cellTypes)),row.names=cellTypes)

normal_count<-c( Bas= "10-80", Bcell="70-530", 
                   CD4T="300-1500", CD8T ="140-820", 
           Eos="30-300",
           Neu="2k-6k", Mono="200-900",NK="80-430", 
           Treg="7-52*")

par(mfrow=c(3,3))
for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    wgt2 = as.numeric(truevalues0[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    wRMSE = MetricsWeighted::rmse(obs,pred, w= wgt2)
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,0)
    matrixr$wRMSE[k]<-round(wRMSE,0)
    matrixr$normal_count[k]<-normal_count[cellTypes[k]]
    rangexy = range(c(pred,obs))

}
## Warning in cor(obs, pred, method = "pearson"): the standard deviation is zero
cellTypes<-c("Bas", "Bcell","CD4T",  "CD8T", "Eos","Neu",  "Mono", "NK", "Treg" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",  
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL


stack2<-t(truevalues[,cellTypes])
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(dat2$X2),]
dat2<-dat2[order(dat2$X1),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
Bnv = "#1B9E77", Bmem = "magenta", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", 
CD8nv = "#7570B3", CD8mem = "maroon", 
NK = "#E6AB02")

for (i in names(groups))
dat2[,i]<-0

for(i in c("Bas", "Eos","Neu",  "Mono", "NK", "Treg" ))
  dat2[dat2$X1==i,i]<-cellcountsval[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem")]<-cellcountsval[,c("CD4nv","CD4mem")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcountsval[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcountsval[,c("Bnv","Bmem")]


dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval1a<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=80), data=dat2, cols=c("Bas", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed counts") +#xlab("\nSample") +
        ylab("Predicted counts") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        #guides(fill=T) +
        theme_classic(base_size = 20)+ #theme(text = element_text(size=rel(2)))+
        ggtitle("Cell projection extended versus FACS and CBC data") + scale_fill_manual(values=groups)+
        #theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=2500, xmax=3500, ymin=0, ymax=1000)
        

plotval1a

CB artificial mixtures Preprocessing was te same minfi+sesame

#pheno3<-as.data.frame(pData(epicData))
#load(file = "Cellcountsprojections.RData")
#attach(pheno3)
#Only Cord blood
#pheno4<-pheno4[1:86,]
coefEsts = FlowSorted.BloodExtended.EPIC.compTable
annotation_col=data.frame(CellType=colnames(coefEsts), row.names = colnames(coefEsts))
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", 
  Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", 
#CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", 
#CD8T = "darkgray", 
CD8nv = "#7570B3",  CD8mem = "maroon",# 
#nRBC = "#7E6E85", 
NK = "#E6AB02")#, CB_Mix = "white")
ann_colors = list(
    CellType = groups#[names(groups)%in%colnames(coefEsts)]
) 

try(pheatmap(coefEsts,color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
             #      annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "DMRs EPIC filtered cells", cluster_rows = T, show_rownames = F))#, labels_col = combined$CellType))

annotation_col=data.frame(CellType=CB_deconv_pheno$CellType, row.names = rownames(CB_deconv_pheno))

groups<-c(#Bas = "#E41A1C", Eos = "#3E8E93", 
  Neu = "#66A61E", Mono = "#E7298A", 
Bcell = "gold", 
#Bnv = "#1B9E77", Bmem = "magenta", 
CD4T = "lightgray", 
#CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", 
CD8T = "darkgray", 
#CD8nv = "#7570B3",  # 
nRBC = "#7E6E85", 
NK = "#E6AB02", CB_Mix = "white")
ann_colors = list(
    CellType = groups#[names(groups)%in%colnames(coefEsts)]
) 

referencePd2<-CB_deconv_pheno
#referenceBetas2<-CB_deconv_betas3[rownames(CB_deconv_betas3), ]
referenceBetas2<-getBeta(CordbloodEPICset)#[rownames(CB_deconv_betas3), ]

identical(colnames(referenceBetas2), rownames(referencePd2))
## [1] TRUE
ann_colors = list(
    CellType = groups
) 

try(pheatmap(referenceBetas2[rownames(referenceBetas2)%in%rownames(coefEsts),],color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
             #      annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "DMRs EPIC CB filtered cells  ", cluster_rows = T, show_rownames = F, labels_col = referencePd2$CellType))

#All together
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", 
  Neu = "#66A61E", Mono = "#E7298A", 
Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", 
CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", 
CD8T = "darkgray", 
CD8nv = "#7570B3",  CD8mem = "maroon",# 
nRBC = "#7E6E85", 
NK = "#E6AB02")#, CB_Mix = "white", MIX="white")
ann_colors = list(
    CellType = groups#[names(groups)%in%colnames(coefEsts)]
) 

referencePd2<-CB_deconv_pheno
referenceBetas3<-cbind(getBeta(CB_deconv_rgset)[rownames(coefEsts),
                                                CB_deconv_rgset$CellType!="CB_Mix"],
                       getBeta(FlowSorted.BloodExtended.EPIC)[rownames(coefEsts),
                                                              FlowSorted.BloodExtended.EPIC$CellType!="MIX"])
# referenceBetas3<-cbind(getBeta(CordbloodEPICset)[rownames(coefEsts),
#                                                 CordbloodEPICset$CellType!="CB_Mix"],
#                        getBeta(FlowSorted.BloodExtended.EPIC2)[rownames(coefEsts),
#                                                               FlowSorted.BloodExtended.EPIC2$CellType!="MIX"])
identical(rownames(referenceBetas3), rownames(coefEsts))
## [1] TRUE
annotation_col=data.frame(CellType=c(CB_deconv_pheno$CellType[CB_deconv_rgset$CellType!="CB_Mix"],
                                     FlowSorted.BloodExtended.EPIC$CellType[FlowSorted.BloodExtended.EPIC$CellType!="MIX"]), 
                          Source=c(rep("Cord Blood", length(CB_deconv_pheno$CellType[CB_deconv_rgset$CellType!="CB_Mix"])),
                                   rep("Adult Blood",
                                       length(FlowSorted.BloodExtended.EPIC$CellType[FlowSorted.BloodExtended.EPIC$CellType!="MIX"]))),
                          row.names = c(rownames(CB_deconv_pheno[CB_deconv_rgset$CellType!="CB_Mix",]),
                                        colnames(FlowSorted.BloodExtended.EPIC[,FlowSorted.BloodExtended.EPIC$CellType!="MIX"])))


ann_colors = list(
    CellType = groups
) 

try(pheatmap(referenceBetas3,color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
             #      annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "IDOL EPIC-Ext purified cord blood and adult cells", cluster_rows = T, show_rownames = F, labels_col = annotation_col$CellType))

#####






# cellcountsCB<-FlowSorted.Blood.EPIC:::projectCellType(CB_deconv_betas[rownames(coefEsts),], coefEsts, lessThanOne = TRUE)
cellcountsCB<-FlowSorted.Blood.EPIC:::projectCellType(getBeta(CordbloodEPICset)[rownames(coefEsts),], coefEsts, lessThanOne = FALSE)
sumage<-rowSums(cellcountsCB)
cellcountsCB2<-round(cellcountsCB*100/sumage,1)
#cellcountsCB2
phenoCB<-cbind.data.frame(cellcountsCB2, CellType=referencePd2$CellType)
#write.csv(phenoCB,file="phenoCB.csv")
#Add the Exclude
stack<-cellcountsCB2
#stack<-round(stack*100/rowSums(stack),2)
boxplot(stack)

stack<-t(stack)
dat2 <- melt(stack)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
phenoCB$label<-paste0(phenoCB$CellType,"_", seq(along = phenoCB$CellType))



for (i in dput(levels(as.factor(phenoCB$CellType)))){
    stack<-phenoCB[phenoCB$CellType==i,1:12]
    stack<-round(stack*100/rowSums(stack),2)
    stack<-t(stack)
    dat2 <- melt(stack)#, id.vars = "Sample")
    #dat2$X1 <- relevel(dat2$X1, ref=i)
    #levels(dat2$X1)
    #dat2$X1 <-fct_rev(dat2$X1)
    #levels(dat2$X1)
    colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#nRBC = "#7E6E85", 
NK = "#E6AB02")#, CB_Mix = "white")
    colors2<-colors1[levels(dat2$X1)]
    fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) + 
        geom_bar(stat="identity") +
        xlab("") +#xlab("\nSample") +
        ylab("") +#ylab("Cell %\n") + 
        geom_hline(yintercept=70, linetype="dashed", color = "red")+ 
        #guides(fill=T) +
        theme_bw()+
        ggtitle(paste("Cell projection IDOL", i)) + scale_fill_manual(values=colors2)+
        theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
        theme(legend.title=element_blank(), legend.position = "none") 
    #theme(legend.position="none")
    #detach(pheno2)
    assign(x = paste0(i,"_bar"),value = fetaladultspine)   
}
## c("Bcell", "CB_Mix", "CD4T", "CD8T", "Mono", "Neu", "NK", "nRBC"
## )
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
#multiplot(  WBC_bar, WBC_FACS_bar, cols=2)
multiplot(  Mono_bar, #Basophil_bar, 
            Neu_bar,#Eos_bar, 
            nRBC_bar, CB_Mix_bar,
            Bcell_bar,CD4T_bar, 
            #CD4nv_bar, CD4mem_bar, Treg_bar,  
            CD8T_bar,NK_bar,
            #CD8T_naive_bar, CD8T_EM_bar,CD8T_CM_bar, NKT_bar,
            #Bnv_bar, Bmem_bar, 
             cols=4)
## 
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
## 
##     pattern

#CB mixture adjusted

library(FlowSorted.CordBloodCombined.450k)
## snapshotDate(): 2021-05-18
library(ExperimentHub)
hub <- ExperimentHub()
## snapshotDate(): 2021-05-18
myfiles <- query(hub, "FlowSorted.CordBloodCombined.450k")
FlowSorted.CordBloodCombined.450k <- myfiles[[1]]
## see ?FlowSorted.CordBloodCombined.450k and browseVignettes('FlowSorted.CordBloodCombined.450k') for documentation
## loading from cache
FlowSorted.CordBloodCombined.450k
## class: RGChannelSet 
## dim: 575719 289 
## metadata(0):
## assays(2): Green Red
## rownames(575719): 10600322 10600328 ... 74810485 74810492
## rowData names(0):
## colnames(289): 3999984027_R02C01 3999984027_R06C01 ...
##   200705360098_R08C01 200705360110_R03C01
## colData names(13): Sample_Name Subject.ID ... CellType_original
##   Reclassified
## Annotation
##   array: IlluminaHumanMethylation450k
##   annotation: ilmn12.hg19
coefEsts = FlowSorted.BloodExtended.450klegacy.compTable

cellcountsCBref<-FlowSorted.Blood.EPIC:::projectCellType(getBeta(FlowSorted.CordBloodCombined.450k)[rownames(coefEsts),], coefEsts, lessThanOne = TRUE)
## Loading required package: IlluminaHumanMethylation450kmanifest
sumage<-rowSums(cellcountsCBref)
cellcountsCBref<-round(cellcountsCBref*100/sumage,1)
#cellcountsCB2
phenoCBref<-cbind.data.frame(cellcountsCBref, CellType=FlowSorted.CordBloodCombined.450k$CellType)
#write.csv(phenoCB,file="phenoCB.csv")
#Add the Exclude
stack<-cellcountsCBref
#stack<-round(stack*100/rowSums(stack),2)
boxplot(stack)

stack<-t(stack)
dat2 <- melt(stack)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
phenoCBref$label<-paste0(phenoCBref$CellType,"_", seq(along = phenoCBref$CellType))



for (i in dput(levels(as.factor(phenoCBref$CellType)))){
    stack<-phenoCBref[phenoCBref$CellType==i,1:12]
    stack<-round(stack*100/rowSums(stack),2)
    stack<-t(stack)
    dat2 <- melt(stack)#, id.vars = "Sample")
    #dat2$X1 <- relevel(dat2$X1, ref=i)
    #levels(dat2$X1)
    #dat2$X1 <-fct_rev(dat2$X1)
    #levels(dat2$X1)
    colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#nRBC = "#7E6E85", 
NK = "#E6AB02")#, CB_Mix = "white")
    colors2<-colors1[levels(dat2$X1)]
    fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) + 
        geom_bar(stat="identity") +
        xlab("") +#xlab("\nSample") +
        ylab("") +#ylab("Cell %\n") + 
        geom_hline(yintercept=70, linetype="dashed", color = "red")+ 
        #guides(fill=T) +
        theme_bw()+
        ggtitle(paste("Cell projection IDOL \n ref UCB", i)) + scale_fill_manual(values=colors2)+
        theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
        theme(legend.title=element_blank(), legend.position = "none") 
    #theme(legend.position="none")
    #detach(pheno2)
    assign(x = paste0(i,"_bar"),value = fetaladultspine)   
}
## c("Bcell", "CD4T", "CD8T", "Gran", "Mono", "NK", "nRBC", "WBC"
## )
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
#multiplot(  WBC_bar, WBC_FACS_bar, cols=2)
multiplot(  #Neu_bar,
            Gran_bar, Mono_bar, #Basophil_bar, 
            #Eos_bar, 
            nRBC_bar, #CB_Mix_bar, 
            WBC_bar,
            Bcell_bar,CD4T_bar, 
            #CD4nv_bar, CD4mem_bar, Treg_bar,  
            CD8T_bar,NK_bar,
            #CD8T_naive_bar, CD8T_EM_bar,CD8T_CM_bar, NKT_bar,
            #Bnv_bar, Bmem_bar, 
             cols=3)
## 
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
## 
##     pattern

load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/UCSF_sesame.RData")
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/Unfiltered.rda")
UCSF_set<-preprocessNoob(epicData)
colnames(UCSF_set)<-colnames(UCSF_betas)
# cellcountsval3<-projectCellType_CP(getBeta(UCSF_sesame)[rownames(matFDR),], as.matrix(matFDR), lessThanOne = TRUE)
# cellcountsval3<-round(cellcountsval3*100/rowSums(cellcountsval3),2)
# boxplot(cellcountsval3)
cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", #"CD8T_CM", 
              "CD8mem", 
"CD8nv", "Eos", #"MIX", 
"Mono", "Neu", "NK", #"NKT", 
"Treg")#,"WBC")
 Tcellmatrix <- read.metharray.sheet(base = "I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF", pattern = "phenotype_extended2.csv")
## [read.metharray.sheet] Found the following CSV files:
## [1] "I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/phenotype_extended2.csv"
 rownames(Tcellmatrix)<-Tcellmatrix$array_pos
#dput(colnames(sheet2))
#Tcellmatrix<-UCSF_pheno
#  c("Sample_Name", "Sample_Well", "Sample_Plate", "Sample_Group", 
# "Pool_ID", "array_pos", "Sampling.ID", "Idno", "Histological.Group", 
# "Time.point", "White_Cell_count", "Tcell_count", "CD4T_count_tube1", 
# "DP_count", "CD8T_count", "DN_count", "Tcell", "CD4T1", "DP", 
# "CD8T1", "DN", "CD4T_count_tube2", "CD4Tnv_count", "CD4T_RARO_DN_count", 
# "CD4Tmem_count", "CD4T_RARO_DN_count.1", "CD4T", "CD4Tnv", "CD4T_RARO_DN", 
# "CD4Tmem", "CD4T_RARO_DN.1", "CD8T_count_tube2", "CD8Tnv_count_tube2", 
# "CD8T_RARO_DN_count", "CD8Tmem_count", "CD8T_RARO_DP_count", 
# "CD8T", "CD8T_naive", "CD8T_RARO_DN", "CD8T_mem", "CD8T_RARO_DP", 
# "grade", "dxgroup", "timet", "Array", "Slide", "Basename")]

#head(estimatesbloodref)
#estimatesbloodref1<- round(estimatesbloodref$counts*100,2)
#head(estimatesbloodref1)
head(Tcellmatrix)
##                     Sample_Name Sample_Well Sample_Plate Sample_Group Pool_ID
## 203127070004_R01C01     1408302        <NA>         <NA>            1    <NA>
## 203127070004_R02C01     1408702        <NA>         <NA>            2    <NA>
## 203127070004_R03C01     1454202        <NA>         <NA>            3    <NA>
## 203127070004_R04C01     1406801        <NA>         <NA>            4    <NA>
## 203127070004_R05C01     1454001        <NA>         <NA>            5    <NA>
## 203127070004_R06C01     1401304        <NA>         <NA>            6    <NA>
##                               array_pos Sampling.ID  Idno Histological.Group
## 203127070004_R01C01 203127070004_R01C01    14083-02 14083          1 new gbm
## 203127070004_R02C01 203127070004_R02C01    14087-02 14087          2 new lgg
## 203127070004_R03C01 203127070004_R03C01    14542-02 14542          1 new gbm
## 203127070004_R04C01 203127070004_R04C01    14068-01 14068          1 new gbm
## 203127070004_R05C01 203127070004_R05C01    14540-01 14540 4 rec lgg, now gbm
## 203127070004_R06C01 203127070004_R06C01    14013-04 14013          2 new lgg
##                                     Time.point White_Cell_count Tcell_count
## 203127070004_R01C01     2 postsurg/prechemorad            10400        1195
## 203127070004_R02C01     8 postsurg/no chemorad             7490        1186
## 203127070004_R03C01     2 postsurg/prechemorad             5360        1173
## 203127070004_R04C01                 1 pre surg            13800         755
## 203127070004_R05C01                 1 pre surg             7490        1133
## 203127070004_R06C01 3 postchemorad/pre adj tmz             3169         584
##                     CD4T_count_tube1 DP_count CD8T_count DN_count Tcell  CD4T
## 203127070004_R01C01              770       59        353       14 11.49  7.40
## 203127070004_R02C01              619        7        467       94 15.83  8.26
## 203127070004_R03C01              767        7        369       30 21.88 14.31
## 203127070004_R04C01              420       12        299       23  5.47  3.04
## 203127070004_R05C01              787        9        302       35 15.13 10.51
## 203127070004_R06C01              401        7        155       21 18.43 12.65
##                       DP CD8T   DN CD4nv_old CD4mem_old CD8T_naive_old
## 203127070004_R01C01 0.57 3.39 0.13      0.49       5.45           1.82
## 203127070004_R02C01 0.09 6.23 1.26      2.55       4.20           2.61
## 203127070004_R03C01 0.13 6.88 0.56      5.17       6.60           5.65
## 203127070004_R04C01 0.09 2.17 0.17        NA         NA             NA
## 203127070004_R05C01 0.12 4.03 0.47      3.23       5.32           2.20
## 203127070004_R06C01 0.22 4.89 0.66      1.87       8.09           1.94
##                     CD8Tmem_old     CD4nv    CD4mem CD8T_naive   CD8Tmem
## 203127070004_R01C01        1.13 0.6129293  6.816970   2.079855 1.2937681
## 203127070004_R02C01        2.14 3.1063248  5.118376   3.404372 2.7915847
## 203127070004_R03C01        0.52 6.3054617  8.042296   6.296242 0.5837576
## 203127070004_R04C01          NA        NA        NA         NA        NA
## 203127070004_R05C01        0.98 3.9718023  6.538198   2.762500 1.2350000
## 203127070004_R06C01        2.06 2.3771812 10.294044   2.380658 2.5307895
##                     CD4T_count_tube2 CD4Tnv_count CD4T_RARO_DN_count
## 203127070004_R01C01              620           41                118
## 203127070004_R02C01              285           88                 50
## 203127070004_R03C01              462          167                 79
## 203127070004_R04C01               NA           NA                 NA
## 203127070004_R05C01              423          130                 75
## 203127070004_R06C01              759          112                147
##                     CD4Tmem_count CD4T_RARO_DN_count.1 CD4T2 CD4Tnv
## 203127070004_R01C01           456                    7  5.96   0.39
## 203127070004_R02C01           145                    1  3.81   1.17
## 203127070004_R03C01           213                    4  8.62   3.12
## 203127070004_R04C01            NA                   NA    NA     NA
## 203127070004_R05C01           214                    4  5.65   1.74
## 203127070004_R06C01           485                   16 23.95   3.53
##                     CD4T_RARO_DN CD4Tmem CD4T_RARO_DN.1 CD8T_count_tube2
## 203127070004_R01C01         1.13    4.38           0.07              237
## 203127070004_R02C01         0.67    1.94           0.01              239
## 203127070004_R03C01         1.47    3.97           0.07              184
## 203127070004_R04C01           NA      NA             NA               NA
## 203127070004_R05C01         1.00    2.86           0.05              156
## 203127070004_R06C01         4.64   15.30           0.50              280
##                     CD8Tnv_count_tube2 CD8T_RARO_DN_count CD8Tmem_count
## 203127070004_R01C01                127                 29            79
## 203127070004_R02C01                100                 55            82
## 203127070004_R03C01                151                 19            14
## 203127070004_R04C01                 NA                 NA            NA
## 203127070004_R05C01                 85                 31            38
## 203127070004_R06C01                111                 49           118
##                     CD8T_RARO_DP_count CD8T2 CD8T_naive2 CD8T_RARO_DN CD8T_mem2
## 203127070004_R01C01                  1  2.28        1.22         0.28      0.76
## 203127070004_R02C01                  1  3.19        1.34         0.73      1.09
## 203127070004_R03C01                  0  3.43        2.82         0.35      0.26
## 203127070004_R04C01                 NA    NA          NA           NA        NA
## 203127070004_R05C01                  1  2.08        1.13         0.41      0.51
## 203127070004_R06C01                  3  8.84        3.50         1.55      3.72
##                     CD8T_RARO_DP grade            dxgroup
## 203127070004_R01C01         0.01     4          1 new gbm
## 203127070004_R02C01         0.01     2          2 new lgg
## 203127070004_R03C01         0.00     4          1 new gbm
## 203127070004_R04C01           NA     4          1 new gbm
## 203127070004_R05C01         0.01     4 4 rec lgg, now gbm
## 203127070004_R06C01         0.09     2          2 new lgg
##                                          timet  Array        Slide
## 203127070004_R01C01     2 postsurg/prechemorad R01C01 203127070004
## 203127070004_R02C01     8 postsurg/no chemorad R02C01 203127070004
## 203127070004_R03C01     2 postsurg/prechemorad R03C01 203127070004
## 203127070004_R04C01                 1 pre surg R04C01 203127070004
## 203127070004_R05C01                 1 pre surg R05C01 203127070004
## 203127070004_R06C01 3 postchemorad/pre adj tmz R06C01 203127070004
##                                                                                                                                                                                                  Basename
## 203127070004_R01C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R01C01
## 203127070004_R02C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R02C01
## 203127070004_R03C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R03C01
## 203127070004_R04C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R04C01
## 203127070004_R05C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R05C01
## 203127070004_R06C01 I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/78 IPS samples with FCM Data_4-23-20/78 EPIC FCM samples_for TCell analysis/203127070004/203127070004_R06C01
rmse <- function(error)
{
    sqrt(mean(error^2))
}
Tcellmatrix$CD8mem<-Tcellmatrix$CD8Tmem
Tcellmatrix$CD8nv<-Tcellmatrix$CD8T_naive
par(mfrow=c(2,3))
cellTypes<-c("CD4T", "CD4nv", "CD4mem",  "CD8T", "CD8mem","CD8nv")
celltypes<-cellTypes
colorcelltype<-c(  CD4T="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8T ="#7570B3",CD8mem="maroon", CD8nv="#705C83")#, Basophil= "#E41A1C",
           # Eos="#3E8E93",
           # Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85", 
           # Treg="black")
groups<-colorcelltype
truevalues<-Tcellmatrix[,c("CD4T", "CD4nv", "CD4mem",  "CD8T", "CD8mem","CD8nv", "Time.point", "Histological.Group")]
truevalues<-truevalues[complete.cases(truevalues),]
#The new estimates are extrapolated from the RARO experiment using the info from the Tcell experiment


# 3b: extract the coefEsts object from the Step 2 results
coefEsts = FlowSorted.BloodExtended.EPIC.compTable#IDOLopt_fix_1200[["IDOL Optimized CoefEsts"]]
#tmp<-getBeta(UCSF_rgset)
tmp<-getBeta(UCSF_set)
tmp<-tmp[, rownames(truevalues)]
# 3c: predict cell composition

predictedCellFraction = round(projectWBCnew(tmp[rownames(coefEsts),], coefEsts)*100,1)
predictedCellFraction<-as.data.frame(predictedCellFraction)
predictedCellFraction$CD4Tmem2<-predictedCellFraction$CD4mem
predictedCellFraction$CD4T<-rowSums(predictedCellFraction[,c("CD4mem", "CD4nv", "Treg")])
predictedCellFraction$CD4mem<-rowSums(predictedCellFraction[,c("CD4mem", "Treg")])
predictedCellFraction$CD8T<-rowSums(predictedCellFraction[,c("CD8mem", #"CD8T_CM",
                                               "CD8nv")])
#predictedCellFraction$CD8T_mem<-predictedCellFraction$CD8T_EM#rowSums(cellcountsval3[,c("CD8T_EM", "CD8T_CM")])

head(predictedCellFraction)
##                     Bas Bmem Bnv CD4mem CD4nv CD8mem CD8nv Eos Mono  Neu  NK
## 203127070004_R01C01 0.0  0.5 1.7    3.6   0.0   10.8   0.0 0.0  9.3 70.2 1.1
## 203127070004_R02C01 0.0  0.8 0.5    5.6   3.4    6.8   2.2 0.9  4.1 69.2 4.1
## 203127070004_R03C01 0.8  0.0 0.8    5.3   5.7    0.0   2.6 2.1  6.7 70.8 3.2
## 203127070004_R05C01 0.0  0.7 2.6    8.0   4.8    2.9   2.2 0.2  6.7 66.8 2.6
## 203127070004_R06C01 0.0  0.0 0.0    8.7   1.6    2.0   1.0 3.2  7.5 69.1 4.6
## 203127070004_R07C01 0.0  0.0 0.0    0.0   0.0    0.0   0.0 0.0  0.7 96.7 0.0
##                     Treg CD4Tmem2 CD4T CD8T
## 203127070004_R01C01    0      3.6  3.6 10.8
## 203127070004_R02C01    0      5.6  9.0  9.0
## 203127070004_R03C01    0      5.3 11.0  2.6
## 203127070004_R05C01    0      8.0 12.8  5.1
## 203127070004_R06C01    0      8.7 10.3  3.0
## 203127070004_R07C01    0      0.0  0.0  0.0
boxplot(predictedCellFraction)
cellcount_EPICIDOL<-predictedCellFraction
testingCovariates<-truevalues
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
K = length(cellTypes)  # number of cell types

par(mfrow = c(3,3))

for(k in 1:K) {
    pred = as.numeric(predictedCellFraction[,cellTypes[k]])
    obs = as.numeric(testingCovariates[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = range(c(pred,obs), na.rm =TRUE)

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}



#Per time.point
for (i in levels(as.factor(truevalues$Time.point))){
  par(mfrow = c(3,3))
for(k in 1:K) {
    pred = as.numeric(predictedCellFraction[truevalues$Time.point==i,cellTypes[k]])
    obs = as.numeric(testingCovariates[truevalues$Time.point==i,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = range(c(pred,obs), na.rm =TRUE)

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 1.5, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), "\n",i, sep = ""), 
          cex.main = 1.5)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
}

#Per histology
for (i in levels(as.factor(truevalues$Histological.Group))){
  par(mfrow = c(3,3))
for(k in 1:K) {
    pred = as.numeric(predictedCellFraction[truevalues$Histological.Group==i,cellTypes[k]])
    obs = as.numeric(testingCovariates[truevalues$Histological.Group==i,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = range(c(pred,obs), na.rm =TRUE)

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 1.5, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), "\n",i, sep = ""), 
          cex.main = 1.5)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}
}

#Inverted CD8Tnaive and mem
# k="CD8T_naive"
# k1="CD8T_mem"
# pred = as.numeric(predictedCellFraction[,k1])
#   obs = as.numeric(testingCovariates[,k])
#   R2 = cor(obs, pred, method = "pearson")^2
#   RMSE = sqrt(sum((pred - obs)^2/length(pred)))
# 
#   rangexy = range(c(pred,obs), na.rm =TRUE)
# 
#   par(mar = c(5,5,4,2))
#   plot(obs, pred, cex = 2, col = cellcolors[k], pch = 19, 
#       xlim = rangexy, ylim = rangexy, main = "",
#       xlab = paste0("Observed (%) ", k), ylab = paste0("Predicted (%) ", k1),
#       cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
#   title(main = paste(cellTypes[k], "\nRMSE = ", 
#         round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
#         cex.main = 2)
#   abline(a = 0, b = 1, lwd = 2, lty = "dotted")
# 
# k1="CD8T_naive"
# k="CD8T_mem"
# pred = as.numeric(predictedCellFraction[,k1])
#   obs = as.numeric(testingCovariates[,k])
#   R2 = cor(obs, pred, method = "pearson")^2
#   RMSE = sqrt(sum((pred - obs)^2/length(pred)))
# 
#   rangexy = range(c(pred,obs), na.rm =TRUE)
# 
#   par(mar = c(5,5,4,2))
#   plot(obs, pred, cex = 2, col = cellcolors[k], pch = 19, 
#       xlim = rangexy, ylim = rangexy, main = "",
#       xlab = paste0("Observed (%) ", k), ylab = paste0("Predicted (%) ", k1),
#       cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
#   title(main = paste(cellTypes[k], "\nRMSE = ", 
#         round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
#         cex.main = 2)
#   abline(a = 0, b = 1, lwd = 2, lty = "dotted")




# par(mfrow = c(3,3))
# 
# for (i in celltypes) {
#     
#     
#     if (i=="Neu"){
#         plot(truevalues[,i],
#              cellcount_EPICIDOL[,i], main=paste(i),
#              xlab="",ylab="",#paste("True", i, "%"), ylab=paste("Estimated", i, "%"),
#              xlim=c(0,80), ylim=c(0,80),
#              pch = 21,  col="black", bg=colorcelltype[i], #pch = 20,#as.numeric(mix),
#              cex=2.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5 )
#         lm.D9 <- lm(cellcount_EPICIDOL[,i]~truevalues[,i] )
#         rmse(lm.D9$residuals)
#         summary(lm.D9)$r.squared
#         abline(lm.D9, col=colorcelltype[i])
#         abline(a = 0, b=1, col="black", lwd=1, lty=2)
#         text(10, 65, substitute(paste(R^2,m), list(m=round(summary(lm.D9)$r.squared[1]*100,1))), cex = 1.5)
#         text(10, 55, substitute(paste("RMSE",m),list(m=round(rmse(lm.D9$residuals)[1],2))), cex = 1.5)
#         
#     }
#     else if (i=="CD4T"){
#         plot(truevalues[,i],
#              cellcount_EPICIDOL[,i], main=paste(i),
#              xlab="",ylab="",#paste("True", i, "%"), ylab=paste("Estimated", i, "%"),
#              xlim=c(0,25), ylim=c(0,25),
#              pch = 21,  col="black", bg=colorcelltype[i], #pch = 20,#as.numeric(mix),
#              cex=2.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5 )
#         lm.D9 <- lm(cellcount_EPICIDOL[,i]~truevalues[,i] )
#         rmse(lm.D9$residuals)
#         summary(lm.D9)$r.squared
#         abline(lm.D9, col=colorcelltype[i])
#         abline(a = 0, b=1, col="black", lwd=1, lty=2)
#         text(5, 23, substitute(paste(R^2,m), list(m=round(summary(lm.D9)$r.squared[1]*100,1))), cex = 1.5)
#         text(5, 21, substitute(paste("RMSE",m),list(m=round(rmse(lm.D9$residuals)[1],2))), cex = 1.5)
#         
#     } else {
#         plot(truevalues[,i],
#              cellcount_EPICIDOL[,i], main=paste(i),
#              xlab="",ylab="",#paste("True", i, "%"), ylab=paste("Estimated", i, "%"),
#              xlim=c(0,20), ylim=c(0,20),
#              pch = 21,  col="black", bg=colorcelltype[i], #pch = 20,#as.numeric(mix),
#              cex=2.5, cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5 )
#         
#         lm.D9 <- lm(cellcount_EPICIDOL[,i]~truevalues[,i] )
#         rmse(lm.D9$residuals)
#         summary(lm.D9)$r.squared
#         abline(lm.D9, col=colorcelltype[i])
#         abline(a = 0, b=1, col="black", lwd=1, lty=2)
#         text(5, 18, substitute(paste(R^2,m), list(m=round(summary(lm.D9)$r.squared[1]*100,1))), cex = 1.5)
#         text(5, 16, substitute(paste("RMSE",m),list(m=round(rmse(lm.D9$residuals)[1],2))), cex = 1.5)
#         
#     }
# }
# 

#mat150<-matcells2
#matFDR<-matFDR
#validation
library(ExperimentHub)
hub <- ExperimentHub()
## snapshotDate(): 2021-05-18
query(hub, "FlowSorted.Blood.EPIC")
## ExperimentHub with 1 record
## # snapshotDate(): 2021-05-18
## # names(): EH1136
## # package(): FlowSorted.Blood.EPIC
## # $dataprovider: GEO
## # $species: Homo sapiens
## # $rdataclass: RGChannelSet
## # $rdatadateadded: 2018-04-20
## # $title: FlowSorted.Blood.EPIC: Illumina Human Methylation data from EPIC o...
## # $description: The FlowSorted.Blood.EPIC package contains Illumina HumanMet...
## # $taxonomyid: 9606
## # $genome: hg19
## # $sourcetype: tar.gz
## # $sourceurl: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE110554
## # $sourcesize: NA
## # $tags: c("ExperimentData", "Homo_sapiens_Data", "Tissue",
## #   "MicroarrayData", "Genome", "TissueMicroarrayData",
## #   "MethylationArrayData") 
## # retrieve record with 'object[["EH1136"]]'
FlowSorted.Blood.EPIC <- hub[["EH1136"]]
## see ?FlowSorted.Blood.EPIC and browseVignettes('FlowSorted.Blood.EPIC') for documentation
## loading from cache
FlowSorted.Blood.EPIC
## class: RGChannelSet 
## dim: 1051815 49 
## metadata(0):
## assays(2): Green Red
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(49): 201868500150_R01C01 201868500150_R03C01 ...
##   201870610111_R06C01 201870610111_R07C01
## colData names(32): Sample_Plate Sample_Well ... filenames normalmix
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b4.hg19
# Step 2 separate the reference from the testing dataset if you want to run 
# examples for estimations for this function example

RGsetTargets <- FlowSorted.Blood.EPIC[,
             FlowSorted.Blood.EPIC$CellType == "MIX"]
             
sampleNames(RGsetTargets) <- paste(RGsetTargets$CellType,
                            seq_len(dim(RGsetTargets)[2]), sep = "_")
RGsetTargets
## class: RGChannelSet 
## dim: 1051815 12 
## metadata(0):
## assays(2): Green Red
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(12): MIX_1 MIX_2 ... MIX_11 MIX_12
## colData names(32): Sample_Plate Sample_Well ... filenames normalmix
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b4.hg19
#tmp3<-getBeta(preprocessNoob(RGsetTargets))#[,Ext_deconv_pheno$CellType=="WBC"]
#tmp3a<-getBeta(sesamize(RGsetTargets))#[,Ext_deconv_pheno$CellType=="WBC"]
tmp3a<-getBeta(preprocessNoob(RGsetTargets))#[,Ext_deconv_pheno$CellType=="WBC"]
phenoIDOLpre<-pData(RGsetTargets)[,c("Bcell","CD4T",  "CD8T", "Neu",  "Mono", "NK" )]
stack<-as.data.frame(phenoIDOLpre)#[phenoIDOLpre$CellType=="MIX",c("Basophil","Bcell","CD4T",  "CD8T", "Eos","Neu",  "Mono", "NK", "Treg" )])
#stack<-stack[1:6,]
sumage<-rowSums(stack)
stack<-round(stack*100/sumage,1)
stack$Gran<-stack$Neu
head(stack)
##       Bcell CD4T CD8T Neu Mono NK Gran
## MIX_1    19    7   19  21   19 15   21
## MIX_2     4   15    4  70    5  2   70
## MIX_3     2    9    6  73   10  0   73
## MIX_4     1   16   11  63    7  2   63
## MIX_5     7   16   29  11   22 15   11
## MIX_6     2   14    8  67    6  3   67
cellcountsbase<-projectCellType_CP(tmp3a[IDOLOptimizedCpGs,], IDOLOptimizedCpGs.compTable, lessThanOne = TRUE)
boxplot(cellcountsbase)

cellcountsbase<-round(cellcountsbase*100,2)
cellcountsbase<-as.data.frame(cellcountsbase)
cellcountsbase$Gran<-cellcountsbase$Neu
cellcountsval<-projectCellType_CP(tmp3a[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)

boxplot(cellcountsval)

cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+ cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas



cellTypes<-c("Bcell","CD4T",  "CD8T", "Gran",  "Mono", "NK")
cellcolors<-c(  Bcell="#1B9E77", #CD4mem="darkblue", 
                   CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83", Basophil= "#E41A1C",
           Eos="#3E8E93",
           Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85", 
           #Treg="black")
truevalues<-stack
cellcount_EPICIDOL<-cellcountsval

K = length(cellTypes)  # number of cell types

par(mfrow = c(2,3))
for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("Bcell","CD4T",  "CD8T", "Gran",  "Mono", "NK")
cellcolors<-c(  Bcell="#1B9E77", #CD4mem="darkblue", 
                   CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83", Basophil= "#E41A1C",
           Eos="#3E8E93",
           Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85", 
           #Treg="black")
truevalues<-stack


cellcount_EPICIDOL<-cellcountsbase

K = length(cellTypes)  # number of cell types

par(mfrow = c(2,3))
for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

celltype<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv",  "CD8mem", 
                                              "CD8nv", "Eos", 
                                              "Mono", "Neu", "NK", 
                                              "Treg")

epicData2<-FlowSorted.BloodExtended.EPIC[,FlowSorted.BloodExtended.EPIC$CellType%in%celltype]

print("all purity")
## [1] "all purity"
  print(summary(epicData2$purity))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   85.00   88.75   93.00   92.31   96.00   99.00
  print("all meth purity")
## [1] "all meth purity"
 print(summary(epicData2$meth_purity))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   85.70   96.83   98.50   97.63   99.42  100.00
 print(summary(epicData2$meth_purity[epicData2$CellType!="CD8mem"]))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   91.30   97.12   98.65   98.01   99.53  100.00
for (i in levels(as.factor(epicData2$CellType))){
 
  print(i)
 print(summary(epicData2$purity[epicData2$CellType==i]))
 
 print(summary(epicData2$meth_purity[epicData2$CellType==i]))
}
## [1] "Bas"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   85.00   88.08   92.40   91.73   95.06   98.00 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   92.60   95.90   98.85   97.43   99.40   99.80 
## [1] "Bmem"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   90.00   91.00   91.00   91.83   91.00   97.00 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   94.80   95.85   97.75   97.43   98.75  100.00 
## [1] "Bnv"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   95.00   95.75   96.50   96.50   97.25   98.00 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   91.30   94.08   97.50   96.58  100.00  100.00 
## [1] "CD4mem"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   86.01   86.88   87.36   88.82   89.30   94.57 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   93.80   96.72   98.10   97.28   98.65   99.10 
## [1] "CD4nv"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   92.71   93.78   95.05   95.29   96.01   98.89 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   95.20   96.50  100.00   98.34  100.00  100.00 
## [1] "CD8mem"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    85.0    85.0    85.5    86.5    87.0    90.0 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   85.70   88.25   93.30   92.70   97.75   98.50 
## [1] "CD8nv"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    85.0    86.0    87.0    87.8    89.0    92.0 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    95.3    96.6    97.2    97.0    97.4    98.5 
## [1] "Eos"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    86.0    87.5    93.0    92.5    98.0    98.0 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   98.30   98.83   99.30   99.22   99.70  100.00 
## [1] "Mono"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      86      91      91      91      93      94 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   98.10   98.30   99.00   98.84   99.30   99.50 
## [1] "Neu"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   94.00   94.25   95.00   95.33   96.50   97.00 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   96.40   97.83   99.35   98.70   99.67  100.00 
## [1] "NK"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    95.0    96.5    98.0    97.5    99.0    99.0 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   97.50   98.40   99.05   98.90   99.55  100.00 
## [1] "Treg"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   89.00   91.00   93.00   92.67   94.50   96.00 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   98.40   98.55   98.70   98.80   99.00   99.30
boxplot(FlowSorted.BloodExtended.EPIC$purity[FlowSorted.BloodExtended.EPIC$CellType%in%celltype]~FlowSorted.BloodExtended.EPIC$CellType[FlowSorted.BloodExtended.EPIC$CellType%in%celltype])

library(ggsignif)
library(ggpubr)
phenobox<-pData(epicData2)
boxdat<-as.data.frame(phenobox[phenobox$CellType!="MIX",c("purity", "CellType", "meth_purity")])
boxdat<-boxdat[order(boxdat$CellType),]

#boxdat$purity<-as.numeric(boxdat$purity)
my_comparisons<- list(c("Bas", "CD8mem"),
                      c("Bmem", "CD8mem"),
                      c("Bnv", "CD8mem"),
                      c("CD4mem", "CD8mem"),
                      c("CD4nv", "CD8mem"),
                      c("CD8nv", "CD8mem"),
                      c("Eos", "CD8mem"),
                      c("Mono", "CD8mem"),
                      c("Neu", "CD8mem"),
                      c("NK", "CD8mem"),
                      c("Treg", "CD8mem"))#,
#c("CD8T", "NK"),
#c("Neu", "Bcell"))
#c("Bcell",  "CD4T", "CD8T", "Mono", "Neu", "NK")
colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
Bnv = "#1B9E77", Bmem = "magenta",  
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",  
CD8nv = "#7570B3", CD8mem = "maroon", 
 NK = "#E6AB02")
p<-ggboxplot(boxdat, x = "CellType", y = "purity",
          color = "CellType", palette = colors1,
          xlab = "Cell type", ylab = "Cell sorting purity %") +
    #stat_compare_means(comparisons = my_comparisons, label.y = c(105, 103, 101, 102, 104,
     #                                                            105, 103, 101, 102, 104, 105))+
    stat_compare_means(label.y = 101) + theme(legend.position="none")
q<-ggboxplot(boxdat, x = "CellType", y = "meth_purity",
          color = "CellType", palette = colors1,
          xlab = "Cell type", ylab = "Estimated DNA methylation purity %") +
    #stat_compare_means(comparisons = my_comparisons, label.y = c(105, 103, 101, 102, 104,
     #                                                            105, 103, 101, 102, 104, 105))+
    stat_compare_means(label.y = 101) + theme(legend.position="none")
multiplot(p,q,cols = 1)

pheno2<-as.data.frame(pData(Ext_deconv_rgset)) 

#General QC and supplementary figures
#Genetic heatmap control SNPs
library(pheatmap)


pheno2$CellType<-ifelse(pheno2$CellType=="CD8T_naive", "CD8nv",
                         ifelse(pheno2$CellType=="Basophil", "Bas",
                                ifelse(pheno2$CellType=="CD8T_EM", "CD8mem",pheno2$CellType)))

colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
Bnv = "#1B9E77", Bmem = "magenta",  
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",  
CD8nv = "#7570B3", CD8mem = "maroon", 
 NK = "#E6AB02")
pheno2$meth_ethnicity2<-ifelse(pheno2$meth_ethnicity=="ASIAN", "Asian", 
                               ifelse(pheno2$meth_ethnicity=="WHITE", "White", 
                                      "Black"))
pheno2<-pheno2[pheno2$CellType%in%names(colors1) & pheno2$include!="No",]
ann_colors = list(CellType=colors1,
                  Sex = c(F="red", M="blue"),
                  Ethnicity=c("African-American"='black', "East-Asian"="firebrick", "Indo-European"="lightgray", Mixed="green"), SeSaMe_ethnicity=c("Black"='black', "Asian"="firebrick", "White"="lightgray"))

#levels(pheno2$CellType)<-c( "CD4T" , "CD8T" , "Bcell", "NK", "Neu" , "Mono" , "MIX"   )
annotation_row<-data.frame(CellType=pheno2$CellType, Sex=pheno2$Sex, Ethnicity= pheno2$Ethnicity_wide, 
                           SeSaMe_ethnicity=pheno2$meth_ethnicity2,
                           #factor(pheno2$CellType, levels = c( "CD4T" , "CD8T" , "Bcell", "NK", "Neu" , "Mono" , "MIX"   ))
                           row.names = rownames(pheno2))
pheatmap(rbind(getSnpBeta(Ext_deconv_rgset[,colnames(Ext_deconv_rgset)%in%rownames(pheno2)]),
               Ext_deconv_SNP[,colnames(Ext_deconv_SNP)%in%rownames(pheno2)]), annotation_col = annotation_row, show_colnames = F, show_rownames = F, annotation_colors = ann_colors)

#Table one
library(tableone)
factorvars2<-c("CellType","Sex", "bmi_clas", "Ethnicity_wide", "Ethnic_self", "smoker")
design5<-as.data.frame(pheno2)
design5[factorvars2] <- lapply(design5[factorvars2], factor)

contvars<-c("Age","weight_kg", "height_cm", "bmi", "purity")
design5[contvars] <- lapply(design5[contvars], as.numeric)

tableOne <- CreateTableOne(vars = c(factorvars2, contvars), data = design5)

print(tableOne,  quote = TRUE)
##                                                          ""
##  ""                                                       "Overall"       
##   "n"                                                     "    56"        
##   "CellType (%)"                                          "    "          
##   "   Bas"                                                "     6 (10.7) "
##   "   Bmem"                                               "     6 (10.7) "
##   "   Bnv"                                                "     4 ( 7.1) "
##   "   CD4mem"                                             "     4 ( 7.1) "
##   "   CD4nv"                                              "     5 ( 8.9) "
##   "   CD8mem"                                             "     4 ( 7.1) "
##   "   CD8nv"                                              "     5 ( 8.9) "
##   "   Eos"                                                "     4 ( 7.1) "
##   "   Mono"                                               "     5 ( 8.9) "
##   "   Neu"                                                "     6 (10.7) "
##   "   NK"                                                 "     4 ( 7.1) "
##   "   Treg"                                               "     3 ( 5.4) "
##   "Sex = M (%)"                                           "    41 (73.2) "
##   "bmi_clas (%)"                                          "    "          
##   "   Normal"                                             "    17 (30.4) "
##   "   Obese"                                              "    14 (25.0) "
##   "   Overweight"                                         "    24 (42.9) "
##   "   Underweight"                                        "     1 ( 1.8) "
##   "Ethnicity_wide (%)"                                    "    "          
##   "   African-American"                                   "     8 (14.3) "
##   "   East-Asian"                                         "     4 ( 7.1) "
##   "   Indo-European"                                      "    38 (67.9) "
##   "   Mixed"                                              "     6 (10.7) "
##   "Ethnic_self (%)"                                       "    "          
##   "   African-American"                                   "     5 ( 8.9) "
##   "   African American"                                   "     2 ( 3.6) "
##   "   African American/ Italian/ French/ Native American" "     1 ( 1.8) "
##   "   African American/Latino or Hispanic"                "     1 ( 1.8) "
##   "   Cauc/Mex/AA"                                        "     1 ( 1.8) "
##   "   Caucasian"                                          "    35 (62.5) "
##   "   Chinese"                                            "     3 ( 5.4) "
##   "   Hispanic"                                           "     2 ( 3.6) "
##   "   Irish/AA"                                           "     1 ( 1.8) "
##   "   Jap/Chinese"                                        "     1 ( 1.8) "
##   "   Mexican"                                            "     1 ( 1.8) "
##   "   non-hispanic white"                                 "     1 ( 1.8) "
##   "   Persian"                                            "     1 ( 1.8) "
##   "   Portuguese"                                         "     1 ( 1.8) "
##   "smoker = Yes (%)"                                      "    14 (25.5) "
##   "Age (mean (SD))"                                       " 32.18 (11.24)"
##   "weight_kg (mean (SD))"                                 " 85.11 (17.14)"
##   "height_cm (mean (SD))"                                 "175.59 (10.69)"
##   "bmi (mean (SD))"                                       " 27.79 (6.18)" 
##   "purity (mean (SD))"                                    " 92.31 (4.39)"
#Principal component regression analysis
cov<-data.frame(CellType=factor(pheno2$CellType),
    sex=factor(pheno2$Sex),
    slide=factor(pheno2$Slide),
    #array=factor(pheno2$Array),
    Ethnicity=pheno2$Ethnicity_wide,
    Age=pheno2$Age,
    weight=pheno2$weight_kg,
    height=pheno2$height_cm,
    BMI=pheno2$bmi,
    Smoker=factor(pheno2$smoker),
    purity=pheno2$purity)
#Only those analyzed
betas2<-Ext_deconv_betas3[,colnames(Ext_deconv_betas3)%in%rownames(pheno2)]
betas3<-betas2[complete.cases(betas2),]
#Eliminate 0 variance
which(apply(betas3, 1, var)==0)#2 probes 0 variance
## named integer(0)
betas3<-betas3[apply(betas3, 1, var) != 0,]
dim(betas3)#[1] 866089     49 2 probes deleted
## [1] 675992     56
pcrplot(betas3, cov, npc=20)
## Analysis is running, please wait...!
## svdscreeplot.jpg was plotted 
## Top  20  principal components can explain  86.78947 % of data 
##     variation
## pcr_diag.jpg was plotted
npc=50
npc <- min(ncol(betas3), npc)
svd <- prcomp(t(betas3), center = TRUE, scale = TRUE, retx = TRUE,
              na.action = "na.omit")
## Warning: In prcomp.default(t(betas3), center = TRUE, scale = TRUE, retx = TRUE, 
##     na.action = "na.omit") :
##  extra argument 'na.action' will be disregarded
screeplot(svd, npc, type = "barplot")

eigenvalue <- svd[["sdev"]]^2
prop <- (sum(eigenvalue[1:npc])/sum(eigenvalue)) * 100
cat("Top ", npc, " principal components can explain ", prop,
    "% of data \n    variation", "\n")
## Top  50  principal components can explain  98.76665 % of data 
##     variation
for(i in 1:20){
  prop <- (sum(eigenvalue[i])/sum(eigenvalue)) * 100
prop1 <- (sum(eigenvalue[1:i])/sum(eigenvalue)) * 100
print(cat("Top ", i, " principal components can explain ", prop1,
    "% of data \n    variation", "\n the",i, "PC explains", prop , 
    "% of data \n    variation"))
}
## Top  1  principal components can explain  33.55543 % of data 
##     variation 
##  the 1 PC explains 33.55543 % of data 
##     variationNULL
## Top  2  principal components can explain  49.61665 % of data 
##     variation 
##  the 2 PC explains 16.06122 % of data 
##     variationNULL
## Top  3  principal components can explain  57.22655 % of data 
##     variation 
##  the 3 PC explains 7.609905 % of data 
##     variationNULL
## Top  4  principal components can explain  64.50633 % of data 
##     variation 
##  the 4 PC explains 7.279777 % of data 
##     variationNULL
## Top  5  principal components can explain  68.24072 % of data 
##     variation 
##  the 5 PC explains 3.734396 % of data 
##     variationNULL
## Top  6  principal components can explain  71.18448 % of data 
##     variation 
##  the 6 PC explains 2.94376 % of data 
##     variationNULL
## Top  7  principal components can explain  73.33189 % of data 
##     variation 
##  the 7 PC explains 2.147408 % of data 
##     variationNULL
## Top  8  principal components can explain  75.22846 % of data 
##     variation 
##  the 8 PC explains 1.896569 % of data 
##     variationNULL
## Top  9  principal components can explain  76.87958 % of data 
##     variation 
##  the 9 PC explains 1.651121 % of data 
##     variationNULL
## Top  10  principal components can explain  78.2127 % of data 
##     variation 
##  the 10 PC explains 1.333117 % of data 
##     variationNULL
## Top  11  principal components can explain  79.43692 % of data 
##     variation 
##  the 11 PC explains 1.224222 % of data 
##     variationNULL
## Top  12  principal components can explain  80.53005 % of data 
##     variation 
##  the 12 PC explains 1.093126 % of data 
##     variationNULL
## Top  13  principal components can explain  81.58459 % of data 
##     variation 
##  the 13 PC explains 1.054542 % of data 
##     variationNULL
## Top  14  principal components can explain  82.52543 % of data 
##     variation 
##  the 14 PC explains 0.9408381 % of data 
##     variationNULL
## Top  15  principal components can explain  83.40911 % of data 
##     variation 
##  the 15 PC explains 0.8836813 % of data 
##     variationNULL
## Top  16  principal components can explain  84.20467 % of data 
##     variation 
##  the 16 PC explains 0.795563 % of data 
##     variationNULL
## Top  17  principal components can explain  84.89496 % of data 
##     variation 
##  the 17 PC explains 0.6902847 % of data 
##     variationNULL
## Top  18  principal components can explain  85.56438 % of data 
##     variation 
##  the 18 PC explains 0.6694285 % of data 
##     variationNULL
## Top  19  principal components can explain  86.21426 % of data 
##     variation 
##  the 19 PC explains 0.6498807 % of data 
##     variationNULL
## Top  20  principal components can explain  86.78947 % of data 
##     variation 
##  the 20 PC explains 0.5752072 % of data 
##     variationNULL
p <- ENmix:::lmmatrix(svd$x[, 1:npc], cov)
yaxis <- colnames(p)
plotp2<-function ( p, yaxis, xmax, title)
{
    par(mar = c(5, 4.2, 4, 2) + 0.1)
    plot(1, xlim = c(0, xmax), ylim = c(0, length(yaxis) + 1),
         type = "n", bty = "n", axes = FALSE, xlab = "Principal Component",
         ylab = "", main = title)
    axis(1, at = c(1:xmax), pos = 0.5, las = 1, lwd = 3)
    for (i in 1:length(yaxis)) {
        text(0.3, i, yaxis[i], xpd = TRUE, adj = 1)
    }
    for (i in 1:ncol(p)) {
        for (j in 1:nrow(p)) {
            pp <- p[j, i]
            colcode <- "white"
            if (pp <= 1e-09) {
                colcode = "darkred"
            }
            else if (pp <= 1e-04) {
                colcode = "red"
            }
            else if (pp <= 0.01) {
                colcode = "orange"
            }
            else if (pp <= 0.05) {
                colcode = "pink"
            }
            polygon(c(j - 0.5, j - 0.5, j + 0.5, j + 0.5), c(i -
                                                                 0.5, i + 0.5, i + 0.5, i - 0.5), col = colcode,
                    border = NA)
        }
    }
    legend("topright", c("<0.05", "<0.01", "<10E-5", "<10E-10"),
           col = c("pink", "orange", "red", "darkred"), pch = 15,
           pt.cex = 2, bty = "o", horiz = TRUE, xpd = TRUE)
}
plotp2(p, yaxis, 20, title = "Principal Component Regression Analysis")

cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
MIXbetas<-FlowSorted.BloodExtended.EPIC[, FlowSorted.BloodExtended.EPIC$CellType=="MIX"]
trainingBetas<-getBeta(MIXbetas)[,c(1:6)]
trainingCovariates<-as.data.frame(pData(MIXbetas))[c(1:6),cellTypes]*100
testingBetas<-getBeta(MIXbetas)[,c(7:12)]
testingCovariates<-as.data.frame(pData(MIXbetas))[c(7:12),cellTypes]*100

groups<-c(Bas = "#E41A1C",  
          Bmem = "magenta", Bnv = "#1B9E77", 
CD4mem = "darkblue", CD4nv = "#D95F02", 
CD8mem = "maroon", 
CD8nv = "#7570B3", Eos = "#3E8E93",  
Mono = "#E7298A", 
Neu = "#66A61E", NK = "#E6AB02", 
Treg = "black")#, 
#WBC = "lightblue")

# for (i in c(seq(from = 250, to = 1050, by =50), seq(from = 1100, to = 2000, by =100),
#             seq(from = 2000, to = 3000, by =500))){
#   set.seed(1234)
#   assign(paste0("IDOLopt_fix_", i), IDOLoptimize2(candObject, trainingBetas, trainingCovariates,
#                       libSize = i, maxIt = 500, numCores = 5))
# }
# 
# 
# save(IDOLopt_fix_250, IDOLopt_fix_300, IDOLopt_fix_400, IDOLopt_fix_450, IDOLopt_fix_500, IDOLopt_fix_550, 
#      IDOLopt_fix_600, IDOLopt_fix_650, IDOLopt_fix_700, IDOLopt_fix_750, IDOLopt_fix_800, IDOLopt_fix_850,
#      IDOLopt_fix_900, IDOLopt_fix_950, 
#      IDOLopt_fix_1000, IDOLopt_fix_1050, IDOLopt_fix_1100, IDOLopt_fix_1200, IDOLopt_fix_1300, IDOLopt_fix_1400,
#      IDOLopt_fix_1500, IDOLopt_fix_1600, IDOLopt_fix_1700, IDOLopt_fix_1800, IDOLopt_fix_1900, IDOLopt_fix_2000,
#      IDOLopt_fix_2500, IDOLopt_fix_3000, file="IDOLopt_fix_EPIC.rda")


#IDOLOptimizedCpGsBloodExtended<-IDOLOptimizedCpGsBloodExtended

#Raw selection without filtering is unfair as the default universe is different
minfipickcomp<- minfi:::pickCompProbes(preprocessRaw(FlowSorted.BloodExtended.EPIC),cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg"),numProbes = 50, probeSelect = "auto")


table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGsBloodExtended)
## 
## FALSE  TRUE 
##  1053   147
#USe the same universe
minfipickcomp2<- minfi:::pickCompProbes(preprocessRaw(FlowSorted.BloodExtended.EPIC)[rownames(Ext_deconv_betas3),],cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg"),numProbes = 50, probeSelect = "auto")


table(rownames(minfipickcomp2$coefEsts)%in%IDOLOptimizedCpGsBloodExtended)
## 
## FALSE  TRUE 
##  1020   180
# for (i in c(seq(from = 250, to = 1050, by =50), seq(from = 1100, to = 2000, by =100),
#             seq(from = 2000, to = 3000, by =500))){
#   load(paste0("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/Fix/IDOL optimized DMR library_", i, ".RData"))
#   assign(paste0("IDOLopt_fix_", i), get("IDOL.optim.coefEsts"))
# }

# load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/IDOLopt_fix_EPIC.rda")
# 
# matrixEPICIDOL<-matrix(data = NA, nrow=28, ncol = 2, 
#                       dimnames = list(c(paste0("CpG_",
#                                                c(250,300,seq(from = 400, to = 1100, by =50),
#                                                  seq(from = 1200, to = 2000, by =100),
#                                                  2500,3000))), 
#                                         c("RMSE", "R2")))
# for (i in  c(seq(from = 400, to = 1100, by =50),
#                                                  seq(from = 1200, to = 2000, by =100),
#                                                  2500,3000)){
#   IDOLtmp<-get(paste0("IDOLopt_fix_",i))
#   for (j in 1:100){
#     if (j==1){
#       RMSE=RMSE.null=IDOLtmp$RMSE[j]
#       R2=R2.null=IDOLtmp$R2[j]
#     }else{
#       if (IDOLtmp$RMSE[j] <= RMSE.null & IDOLtmp$R2[j] >= R2.null) {
#             RMSE.null = IDOLtmp$RMSE[j]
#             R2.null = IDOLtmp$R2[j]
#             #print(paste("Iteration: ", j, " RMSE=", 
#             #    round(IDOLtmp$RMSE[j], 3), "; R2=", round(IDOLtmp$R2[j], 3), 
#             #    sep = ""))
#         }
#     }
#   }
#   print(paste("The Average RMSE = ", round(RMSE.null, 
#         3), " and R2 = ", round(R2.null, 3), " for the IDOL Optimized Library ",i, 
#         sep = ""))
#   matrixEPICIDOL[paste0("CpG_",i), 1]<-RMSE.null
#   matrixEPICIDOL[paste0("CpG_",i), 2]<-R2.null
# }

#####################################################################################
# Step 3: Using the IDOL optimized library identified in Step 2, deconvolute the 
# training data set and compare the observed versus predicted cell proportions 
#####################################################################################

# 3a: subset the training data to consist of the IDOL optimized library identified
# from Step 2
# trainingBetas.sub = trainingBetas[IDOLopt_fix_1200[["IDOL Optimized Library"]],]
# testingBetas.sub=testingBetas[IDOLopt_fix_1200[["IDOL Optimized Library"]],]
# # 3b: extract the coefEsts object from the Step 2 results
# coefEsts = IDOLopt_fix_1200[["IDOL Optimized CoefEsts"]]


trainingBetas.sub = trainingBetas[IDOLOptimizedCpGsBloodExtended,]
testingBetas.sub=testingBetas[IDOLOptimizedCpGsBloodExtended,]
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = FlowSorted.BloodExtended.EPIC.compTable

colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
predictedCellFraction1 = round(projectWBCnew(testingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes)  # number of cell types

par(mfrow = c(3,4))
for(k in 1:K) {
    pred = c(as.numeric(predictedCellFraction[,cellTypes[k]]),
             as.numeric(predictedCellFraction1[,cellTypes[k]]))
    obs = c(as.numeric(trainingCovariates[,cellTypes[k]]), 
            as.numeric(testingCovariates[,cellTypes[k]]))
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))

    #par(mar = c(5,5,4,2))
    plot(obs[1:6], pred[1:6], cex = 2, 
         pch = 21,  col="black", bg= cellcolors[cellTypes[k]],  
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "", ylab = "",
        cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE, xaxt='n', yaxt='n')
        par(new=T)
    plot(obs[6:12], pred[6:12], cex = 2, 
         pch = 22,  col="black", bg= cellcolors[cellTypes[k]], 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
    #axis(side = 1)
    title(main = substitute(paste(o, " RMSE = ", 
          m, "; ",R^2,"= ", n, sep = ""), 
          list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))), 
          cex.main = 1.5)
    lm.D9 <- lm(pred~obs)
    abline(lm.D9, col=cellcolors[cellTypes[k]])
    abline(a = 0, b=1, col="black", lwd=1, lty=2)
    
  
}

#Comparison with pickcompprobes (minfi)
# 3a: subset the training data 
trainingBetas.sub = trainingBetas[rownames(minfipickcomp$coefEsts),]
testingBetas.sub=testingBetas[rownames(minfipickcomp$coefEsts),]
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = minfipickcomp$coefEsts
colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
predictedCellFraction1 = round(projectWBCnew(testingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes)  # number of cell types

par(mfrow = c(3,4))
for(k in 1:K) {
    pred = c(as.numeric(predictedCellFraction[,cellTypes[k]]),
             as.numeric(predictedCellFraction1[,cellTypes[k]]))
    obs = c(as.numeric(trainingCovariates[,cellTypes[k]]), 
            as.numeric(testingCovariates[,cellTypes[k]]))
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))

    #par(mar = c(5,5,4,2))
    plot(obs[1:6], pred[1:6], cex = 2, 
         pch = 21,  col="black", bg= cellcolors[cellTypes[k]],  
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "", ylab = "",
        cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE, xaxt='n', yaxt='n')
        par(new=T)
    plot(obs[6:12], pred[6:12], cex = 2, 
         pch = 22,  col="black", bg= cellcolors[cellTypes[k]], 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
    #axis(side = 1)
    title(main = substitute(paste(o, " RMSE = ", 
          m, "; ",R^2,"= ", n, sep = ""), 
          list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))), 
          cex.main = 1.5)
    lm.D9 <- lm(pred~obs)
    abline(lm.D9, col=cellcolors[cellTypes[k]])
    abline(a = 0, b=1, col="black", lwd=1, lty=2)
    
  
}

#minfi subset
#Comparison with pickcompprobes (minfi)
# 3a: subset the training data 
trainingBetas.sub = trainingBetas[rownames(minfipickcomp2$coefEsts),]
testingBetas.sub=testingBetas[rownames(minfipickcomp2$coefEsts),]
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = minfipickcomp$coefEsts
colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
predictedCellFraction1 = round(projectWBCnew(testingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes)  # number of cell types

par(mfrow = c(3,4))
for(k in 1:K) {
    pred = c(as.numeric(predictedCellFraction[,cellTypes[k]]),
             as.numeric(predictedCellFraction1[,cellTypes[k]]))
    obs = c(as.numeric(trainingCovariates[,cellTypes[k]]), 
            as.numeric(testingCovariates[,cellTypes[k]]))
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))

    #par(mar = c(5,5,4,2))
    plot(obs[1:6], pred[1:6], cex = 2, 
         pch = 21,  col="black", bg= cellcolors[cellTypes[k]],  
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "", ylab = "",
        cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE, xaxt='n', yaxt='n')
        par(new=T)
    plot(obs[6:12], pred[6:12], cex = 2, 
         pch = 22,  col="black", bg= cellcolors[cellTypes[k]], 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
    #axis(side = 1)
    title(main = substitute(paste(o, " RMSE = ", 
          m, "; ",R^2,"= ", n, sep = ""), 
          list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))), 
          cex.main = 1.5)
    lm.D9 <- lm(pred~obs)
    abline(lm.D9, col=cellcolors[cellTypes[k]])
    abline(a = 0, b=1, col="black", lwd=1, lty=2)
    
  
}

#450k
#load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/IDOL optimized DMR library_1500.RData")


# 3a: subset the training data 
# trainingBetas.sub = trainingBetas[IDOLOptimizedCpGsBloodExtended450k,]
# testingBetas.sub=testingBetas[IDOLOptimizedCpGsBloodExtended450k,]
# # 3b: extract the coefEsts object from the Step 2 results
# coefEsts = FlowSorted.BloodExtended.450klegacy.compTable


trainingBetas.sub = trainingBetas[IDOLOptimizedCpGsBloodExtended450k,]
testingBetas.sub=testingBetas[IDOLOptimizedCpGsBloodExtended450k,]
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = FlowSorted.BloodExtended.450klegacy.compTable


colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
predictedCellFraction1 = round(projectWBCnew(testingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes)  # number of cell types

par(mfrow = c(3,4))
for(k in 1:K) {
    pred = c(as.numeric(predictedCellFraction[,cellTypes[k]]),
             as.numeric(predictedCellFraction1[,cellTypes[k]]))
    obs = c(as.numeric(trainingCovariates[,cellTypes[k]]), 
            as.numeric(testingCovariates[,cellTypes[k]]))
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))

    #par(mar = c(5,5,4,2))
    plot(obs[1:6], pred[1:6], cex = 2, 
         pch = 21,  col="black", bg= cellcolors[cellTypes[k]],  
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "", ylab = "",
        cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE, xaxt='n', yaxt='n')
        par(new=T)
    plot(obs[6:12], pred[6:12], cex = 2, 
         pch = 22,  col="black", bg= cellcolors[cellTypes[k]], 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
    #axis(side = 1)
    title(main = substitute(paste(o, " RMSE = ", 
          m, "; ",R^2,"= ", n, sep = ""), 
          list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))), 
          cex.main = 1.5)
    lm.D9 <- lm(pred~obs)
    abline(lm.D9, col=cellcolors[cellTypes[k]])
    abline(a = 0, b=1, col="black", lwd=1, lty=2)
    
  
}

#Comparison vs true values
#pheno<-as.data.frame(pData(RGsetTargets)) 
#pheno$clas<-paste("Method", substr(pheno$Subject.ID, 1,1))


#Remember the order for the colors look for the order if the results do not match the figure in the paper.
#Figure 1

par(mfrow=c(1,2))
barplot(t(trainingCovariates), col= cellcolors[cellTypes], names.arg = seq(1:6), xlab = "Training")
barplot(t(testingCovariates), col= cellcolors[cellTypes], names.arg = seq(1:6), #legend.text = cellTypes, 
        xlab = "Testing")

#Only training
####################################################################################
# Step 3: Using the IDOL optimized library identified in Step 2, deconvolute the 
# training data set and compare the observed versus predicted cell proportions 
#####################################################################################

# 3a: subset the training data to consist of the IDOL optimized library identified
# from Step 2
# trainingBetas.sub = trainingBetas[IDOLopt_fix_1200[["IDOL Optimized Library"]],]
# testingBetas.sub=testingBetas[IDOLopt_fix_1200[["IDOL Optimized Library"]],]
# # 3b: extract the coefEsts object from the Step 2 results
# coefEsts = IDOLopt_fix_1200[["IDOL Optimized CoefEsts"]]


trainingBetas.sub = trainingBetas[IDOLOptimizedCpGsBloodExtended,]
#testingBetas.sub=testingBetas[IDOLOptimizedCpGsBloodExtended,]
# 3b: extract the coefEsts object from the Step 2 results
coefEsts = FlowSorted.BloodExtended.EPIC.compTable

colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)
#predictedCellFraction1 = round(projectWBCnew(testingBetas.sub, coefEsts)*100,1)
# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes)  # number of cell types

par(mfrow = c(3,4))
for(k in 1:K) {
    pred = as.numeric(predictedCellFraction[,cellTypes[k]])
             #as.numeric(predictedCellFraction1[,cellTypes[k]]))
    obs = as.numeric(trainingCovariates[,cellTypes[k]]) 
            #as.numeric(testingCovariates[,cellTypes[k]]))
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))

    #par(mar = c(5,5,4,2))
    # plot(obs[1:6], pred[1:6], cex = 2, 
    #      pch = 21,  col="black", bg= cellcolors[cellTypes[k]],  
    #   xlim = rangexy, ylim = rangexy, main = "",
    #   xlab = "", ylab = "",
    #   cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE, xaxt='n', yaxt='n')
    #   par(new=T)
    plot(obs[1:6], pred[1:6], cex = 2, 
         pch = 21,  col="black", bg= cellcolors[cellTypes[k]], 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
    title(main = substitute(paste(o, " RMSE = ", 
          m, "; ",R^2,"= ", n, sep = ""), 
          list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))), 
          cex.main = 1.5)
    lm.D9 <- lm(pred~obs)
    abline(lm.D9, col=cellcolors[cellTypes[k]])
    abline(a = 0, b=1, col="black", lwd=1, lty=2)
    
  
}

#Only testing

testingBetas.sub=testingBetas[IDOLOptimizedCpGsBloodExtended,]

coefEsts = FlowSorted.BloodExtended.EPIC.compTable

colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)

# 3d: do a visual depiction of the results (run all of the code below at once)
cellcolors = groups#brewer.pal(6,"Set2")
cellTypes<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
"Eos", "Mono", "Neu", "NK", "Treg")
K = length(cellTypes)  # number of cell types

par(mfrow = c(3,4))
for(k in 1:K) {
    pred = as.numeric(predictedCellFraction1[,cellTypes[k]])
    obs = as.numeric(testingCovariates[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))

    plot(obs[1:6], pred[1:6], cex = 2, 
         pch = 22,  col="black", bg= cellcolors[cellTypes[k]], 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
    title(main = substitute(paste(o, " RMSE = ", 
          m, "; ",R^2,"= ", n, sep = ""), 
          list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))), 
          cex.main = 1.5)
    lm.D9 <- lm(pred~obs)
    abline(lm.D9, col=cellcolors[cellTypes[k]])
    abline(a = 0, b=1, col="black", lwd=1, lty=2)
}

#Validation
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/Validation/Raw_data_mixval.RData")
load(paste0(workdir,"/Validation/Validation_deconv_sesame.RData"))

cellcolors<-c(Bas = "#E41A1C", 
          Bmem = "magenta", Bnv = "#1B9E77", 
CD4mem = "darkblue", CD4nv = "#D95F02", 
CD8mem = "maroon", 
CD8nv = "#7570B3", Eos = "#3E8E93", 
Mono = "#E7298A", 
Neu = "#66A61E", NK = "#E6AB02", 
Treg = "black")

cellTypes<-names(cellcolors)


trainingCovariates<-as.data.frame(pData(Validation_deconv_rgset))[Validation_deconv_rgset$CellType=="MIX",cellTypes]*100
trainingCovariates<-round(trainingCovariates,1)




#noob minfi
trainingBetas.sub = getBeta(MsetEx)[IDOLOptimizedCpGsBloodExtended,
                                                     Validation_deconv_rgset$CellType=="MIX"]


# 3b: extract the coefEsts object from the Step 2 results
coefEsts = FlowSorted.BloodExtended.EPIC.compTable

#colnames(coefEsts)<-c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
#"Eos", "Mono", "Neu", "NK", "Treg")
# 3c: predict cell composistion
predictedCellFraction = round(projectWBCnew(trainingBetas.sub, coefEsts)*100,1)

# 3d: do a visual depiction of the results (run all of the code below at once)

K = length(cellTypes)  # number of cell types



#Artificial mixtures
par(mfrow = c(3,4))
for(k in 1:K) {
    pred = as.numeric(predictedCellFraction[,cellTypes[k]])
    obs = as.numeric(trainingCovariates[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))

    rangexy = c(0,25)#max(c(pred,obs)))#range(c(pred,obs))

    #par(mar = c(5,5,4,2))
    
    plot(obs, pred, cex = 2, 
         pch = 21,  col="black", bg= cellcolors[cellTypes[k]], 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.2, cex.axis = 1.3, frame.plot = FALSE)
    #axis(side = 1)
    title(main = substitute(paste(o, " RMSE = ", 
          m, "; ",R^2,"= ", n, sep = ""), 
          list(o=cellTypes[k], m=round(RMSE, 2), n=round(R2, 2))), 
          cex.main = 1.5)
    lm.D9 <- lm(pred~obs)
    abline(lm.D9, col=cellcolors[cellTypes[k]])
    abline(a = 0, b=1, col="black", lwd=1, lty=2)
    
  
}

par(mfrow=c(1,1))
barplot(t(trainingCovariates), col= cellcolors[cellTypes], names.arg = seq(1:12), xlab = "Replication")

#IDOLopt_EM_2500$`IDOL Optimized CoefEsts`
cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", #"CD8T_CM", 
              "CD8mem", 
"CD8nv", "Eos", #"MIX", 
"Mono", "Neu", "NK", #"NKT", 
"Treg")#,"WBC")
coefEsts = FlowSorted.BloodExtended.EPIC.compTable

#load(file = "I:/Dropbox (Christensen Lab)/manifest EPIC/annotationEPICb5.rda")
load("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/annotationEPICb5updated.rda")
annot<-annot[rownames(coefEsts),]
dim(annot)
## [1] 1200   57
matcells5<-cbind.data.frame(coefEsts, annot)
write.csv(matcells5, file="Suppl_table_5.csv")

#Extract the first reference group
anno4<-strsplit(matcells5$UCSC_RefGene_Group, ";")
anno5a<-lapply(anno4, function(x) if(identical(x, NA_character_)) "Intergenic" else x)
anno5<-lapply(anno5a, `[[`, 1)

matcells5$Function = factor(unlist(anno5), levels= c("TSS1500", "TSS200", "1stExon","ExonBnd", "5'UTR","Body", "3'UTR", "Intergenic"))

matcells5$CGI<-ifelse(matcells5$Relation_to_UCSC_CpG_Island=="","OpenSea",matcells5$Relation_to_UCSC_CpG_Island)
matcells5$CGI2<-ifelse(matcells5$CGI=="N_Shelf"| matcells5$CGI=="S_Shelf","Shelves",
                       ifelse(matcells5$CGI=="N_Shore"| matcells5$CGI=="S_Shore","Shores",
                              matcells5$CGI))

matcells5$Enhancer<-ifelse(matcells5$Phantom5_Enhancers=="", "No", "Yes")
matcells5$DHS<-ifelse(matcells5$DNase_Hypersensitivity_NAME=="", "No", "Yes")
matcells5$OpenChromatin<-ifelse(matcells5$OpenChromatin_NAME=="", "No", "Yes")
matcells5$TFBS<-ifelse(matcells5$TFBS_NAME=="", "No", "Yes")
table(matcells5$CGI)
## 
##  Island N_Shelf N_Shore OpenSea S_Shelf S_Shore 
##      50      49     111     866      46      78
table(matcells5$Function)
## 
##    TSS1500     TSS200    1stExon    ExonBnd      5'UTR       Body      3'UTR 
##        104         32         15         14        128        569         35 
## Intergenic 
##        303
table(matcells5$Enhancer)
## 
##   No  Yes 
## 1061  139
table(matcells5$DHS)
## 
##  No Yes 
## 344 856
table(matcells5$OpenChromatin)
## 
##   No  Yes 
## 1065  135
table(matcells5$TFBS)
## 
##   No  Yes 
## 1034  166
table(matcells5$Methyl450_Loci)
## 
## TRUE 
##  459
matcells5$Methyl450_Loci2<-ifelse(matcells5$Methyl450_Loci==TRUE, "Yes", "No")
annotation_row= data.frame(CGI=matcells5$CGI2, Function= matcells5$Function, Enhancer=matcells5$Enhancer, DHS=matcells5$DHS,
                           OpenChromatin=matcells5$OpenChromatin, TFBS=matcells5$TFBS, HM450k=matcells5$Methyl450_Loci2,
                           
                           row.names = rownames(coefEsts))
annotation_col=data.frame(CellType=colnames(coefEsts), row.names = colnames(coefEsts))
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#NKT = "#7E6E85", 
NK = "#E6AB02")#, WBC = "white")
ann_colors = list(
    CellType = groups, #CGI=c(No="white", Yes="darkgray"), 
    Enhancer=c(No="white", Yes="darkgray"), 
    DHS=c(No="white", Yes="darkgray"),
                           OpenChromatin=c(No="white", Yes="darkgray"), TFBS=c(No="white", Yes="darkgray"),
    HM450k=c(No="white", Yes="darkgray"), 
    Function= c("TSS1500"="#D53E4F", "TSS200"="#F46D43",
            "1stExon"= "#FDAE61","ExonBnd"="#FEE08B",
            "5'UTR"="#E6F598","Body"="#ABDDA4",
            "3'UTR"= "#66C2A5", "Intergenic"="#3288BD"),
    CGI<-c("Island"= "#E41A1C", "Shores"="#377EB8",
       "Shelves"="#4DAF4A", "OpenSea"="#984EA3")
) 


try(pheatmap(coefEsts,color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
                  annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "L-DMR IDOL EPIC library", cluster_rows = T, show_rownames = F))#, labels_col = combined$CellType))

referencePd<-Ext_deconv_pheno[Ext_deconv_pheno$CellType%in%cellTypes,]
table(referencePd$CellType)
## 
##   Bmem    Bnv CD4mem  CD4nv    Eos   Mono    Neu     NK   Treg 
##      6      6      7      6      4      6      6      6      4
referencePd$CellType<-ifelse(referencePd$CellType=="Basophil", "Bas",
                             ifelse(referencePd$CellType=="CD8T_EM", "CD8mem",
                                    ifelse(referencePd$CellType=="CD8T_naive", "CD8nv", referencePd$CellType)))
referenceBetas<-Ext_deconv_betas3[rownames(Ext_deconv_betas3), colnames(Ext_deconv_betas3)%in%rownames(referencePd)]
identical(colnames(referenceBetas), rownames(referencePd))
## [1] TRUE
annotation_col=data.frame(CellType=referencePd$CellType, row.names = rownames(referencePd))
try(pheatmap(referenceBetas[rownames(referenceBetas)%in%rownames(coefEsts),],color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
                  annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "L-DMR IDOL EPIC library", cluster_rows = T, show_rownames = F, labels_col = referencePd$CellType))

#IDOLopt_EM_2500$`IDOL Optimized CoefEsts`
cellTypes = c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", #"CD8T_CM", 
              "CD8mem", 
"CD8nv", "Eos", #"MIX", 
"Mono", "Neu", "NK", #"NKT", 
"Treg")#,"WBC")
coefEsts = minfipickcomp$coefEsts


#load(file = "I:/Dropbox (Christensen Lab)/manifest EPIC/annotationEPICb5.rda")
load("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/annotationEPICb5updated.rda")
annot<-annot[rownames(coefEsts),]
dim(annot)
## [1] 1200   57
matcells5<-cbind.data.frame(coefEsts, annot)
#write.csv(matcells5, file="Suppl table 7.csv")

#Extract the first reference group
anno4<-strsplit(matcells5$UCSC_RefGene_Group, ";")
#character 0 should be transformed to NA or the lapply will fail
#anno5a<-lapply(anno4, function(x) if(identical(x, character(0))) NA_character_ else x)
#Replace for "Intergenic"
anno5a<-lapply(anno4, function(x) if(identical(x, NA_character_)) "Intergenic" else x)
anno5<-lapply(anno5a, `[[`, 1)

matcells5$Function = factor(unlist(anno5), levels= c("TSS1500", "TSS200", "1stExon","ExonBnd", "5'UTR","Body", "3'UTR", "Intergenic"))

matcells5$CGI<-ifelse(matcells5$Relation_to_UCSC_CpG_Island=="","OpenSea",matcells5$Relation_to_UCSC_CpG_Island)
matcells5$CGI2<-ifelse(matcells5$CGI=="N_Shelf"| matcells5$CGI=="S_Shelf","Shelves",
                       ifelse(matcells5$CGI=="N_Shore"| matcells5$CGI=="S_Shore","Shores",
                              matcells5$CGI))

matcells5$Enhancer<-ifelse(matcells5$Phantom5_Enhancers=="", "No", "Yes")
matcells5$DHS<-ifelse(matcells5$DNase_Hypersensitivity_NAME=="", "No", "Yes")
matcells5$OpenChromatin<-ifelse(matcells5$OpenChromatin_NAME=="", "No", "Yes")
matcells5$TFBS<-ifelse(matcells5$TFBS_NAME=="", "No", "Yes")
table(matcells5$CGI)
## 
##  Island N_Shelf N_Shore OpenSea S_Shelf S_Shore 
##      62      46      77     902      44      69
table(matcells5$Function)
## 
##    TSS1500     TSS200    1stExon    ExonBnd      5'UTR       Body      3'UTR 
##         83         40         28          5        118        589         31 
## Intergenic 
##        306
table(matcells5$Enhancer)
## 
##   No  Yes 
## 1006  194
table(matcells5$DHS)
## 
##  No Yes 
## 332 868
table(matcells5$OpenChromatin)
## 
##   No  Yes 
## 1087  113
table(matcells5$TFBS)
## 
##   No  Yes 
## 1053  147
table(matcells5$Methyl450_Loci)
## 
## TRUE 
##  517
matcells5$Methyl450_Loci2<-ifelse(matcells5$Methyl450_Loci==TRUE, "Yes", "No")
annotation_row= data.frame(CGI=matcells5$CGI2, Function= matcells5$Function, Enhancer=matcells5$Enhancer, DHS=matcells5$DHS,
                           OpenChromatin=matcells5$OpenChromatin, TFBS=matcells5$TFBS, HM450k=matcells5$Methyl450_Loci2,
                           
                           row.names = rownames(coefEsts))
annotation_col=data.frame(CellType=colnames(coefEsts), row.names = colnames(coefEsts))
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#NKT = "#7E6E85", 
NK = "#E6AB02")#, WBC = "white")
ann_colors = list(
    CellType = groups, #CGI=c(No="white", Yes="darkgray"), 
    Enhancer=c(No="white", Yes="darkgray"), 
    DHS=c(No="white", Yes="darkgray"),
                           OpenChromatin=c(No="white", Yes="darkgray"), TFBS=c(No="white", Yes="darkgray"),
    HM450k=c(No="white", Yes="darkgray"), 
    Function= c("TSS1500"="#D53E4F", "TSS200"="#F46D43",
            "1stExon"= "#FDAE61","ExonBnd"="#FEE08B",
            "5'UTR"="#E6F598","Body"="#ABDDA4",
            "3'UTR"= "#66C2A5", "Intergenic"="#3288BD"),
    CGI<-c("Island"= "#E41A1C", "Shores"="#377EB8",
       "Shelves"="#4DAF4A", "OpenSea"="#984EA3")
) 



try(pheatmap(coefEsts,color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
                  annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "L-DMR pickcompprobes EPIC library", cluster_rows = T, show_rownames = F))#, labels_col = combined$CellType))

referencePd<-Ext_deconv_pheno[Ext_deconv_pheno$CellType%in%cellTypes,]
table(referencePd$CellType)
## 
##   Bmem    Bnv CD4mem  CD4nv    Eos   Mono    Neu     NK   Treg 
##      6      6      7      6      4      6      6      6      4
referencePd$CellType<-ifelse(referencePd$CellType=="Basophil", "Bas",
                             ifelse(referencePd$CellType=="CD8T_EM", "CD8mem",
                                    ifelse(referencePd$CellType=="CD8T_naive", "CD8nv", referencePd$CellType)))
referenceBetas<-Ext_deconv_betas3[rownames(Ext_deconv_betas3), colnames(Ext_deconv_betas3)%in%rownames(referencePd)]
identical(colnames(referenceBetas), rownames(referencePd))
## [1] TRUE
annotation_col=data.frame(CellType=referencePd$CellType, row.names = rownames(referencePd))
try(pheatmap(referenceBetas[rownames(referenceBetas)%in%rownames(coefEsts),],color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
                  annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "L-DMR pickcompprobes EPIC library", cluster_rows = T, show_rownames = F, labels_col = referencePd$CellType))

#load(file = "I:/Dropbox (Christensen Lab)/manifest EPIC/annotationEPICb5.rda")
load("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/annotationEPICb5updated.rda")
annot<-annot[IDOLOptimizedCpGs,]
dim(annot)
## [1] 450  57
matcells5<-cbind.data.frame(IDOLOptimizedCpGs.compTable, annot)
#write.csv(matcells5, file="Minfi_library_1200_EPIC.csv")

#Extract the first reference group
anno4<-strsplit(matcells5$UCSC_RefGene_Group, ";")
#character 0 should be transformed to NA or the lapply will fail
#anno5a<-lapply(anno4, function(x) if(identical(x, character(0))) NA_character_ else x)
#Replace for "Intergenic"
anno5a<-lapply(anno4, function(x) if(identical(x, NA_character_)) "Intergenic" else x)
anno5<-lapply(anno5a, `[[`, 1)

matcells5$Function = factor(unlist(anno5), levels= c("TSS1500", "TSS200", "1stExon","ExonBnd", "5'UTR","Body", "3'UTR", "Intergenic"))

matcells5$CGI<-ifelse(matcells5$Relation_to_UCSC_CpG_Island=="","OpenSea",matcells5$Relation_to_UCSC_CpG_Island)
matcells5$Enhancer<-ifelse(matcells5$Phantom5_Enhancers=="", "No", "Yes")
matcells5$DHS<-ifelse(matcells5$DNase_Hypersensitivity_NAME=="", "No", "Yes")
matcells5$OpenChromatin<-ifelse(matcells5$OpenChromatin_NAME=="", "No", "Yes")
matcells5$TFBS<-ifelse(matcells5$TFBS_NAME=="", "No", "Yes")
table(matcells5$CGI)
## 
##  Island N_Shelf N_Shore OpenSea S_Shelf S_Shore 
##      10      20      30     342      15      33
table(matcells5$Function)
## 
##    TSS1500     TSS200    1stExon    ExonBnd      5'UTR       Body      3'UTR 
##         38         11          7          1         46        209          9 
## Intergenic 
##        129
table(matcells5$Enhancer)
## 
##  No Yes 
## 380  70
table(matcells5$DHS)
## 
##  No Yes 
## 122 328
table(matcells5$OpenChromatin)
## 
##  No Yes 
## 407  43
table(matcells5$TFBS)
## 
##  No Yes 
## 391  59
table(matcells5$Methyl450_Loci)
## 
## TRUE 
##  149
#OVerlap
table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGsBloodExtended)
## 
## FALSE  TRUE 
##  1053   147
table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGsBloodExtended450k)
## 
## FALSE  TRUE 
##  1111    89
table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGs)
## 
## FALSE  TRUE 
##  1143    57
table(IDOLOptimizedCpGs %in% IDOLOptimizedCpGsBloodExtended)
## 
## FALSE  TRUE 
##   407    43
table(IDOLOptimizedCpGs %in% IDOLOptimizedCpGsBloodExtended450k)
## 
## FALSE  TRUE 
##   423    27
#table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGs)

#table(IDOLOptimizedCpGs %in% IDOLOptimizedCpGsBloodExtended)
table(IDOLOptimizedCpGsBloodExtended %in% IDOLOptimizedCpGsBloodExtended450k)
## 
## FALSE  TRUE 
##   870   330
#table(rownames(minfipickcomp$coefEsts)%in%IDOLOptimizedCpGs)
load("D:/OneDrive/OneDrive - Dartmouth College/Fetal adult/Aging dataset2.RData")

coefEsts = FlowSorted.BloodExtended.450klegacy.compTable


#load(file = "I:/Dropbox (Christensen Lab)/manifest EPIC/annotationEPICb5.rda")
load("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/annotationEPICb5updated.rda")
annot<-annot[rownames(coefEsts),]
dim(annot)
## [1] 1500   57
matcells5<-cbind.data.frame(coefEsts, annot)
#write.csv(matcells5, file="Suppl table 6.csv")

#Extract the first reference group
anno4<-strsplit(matcells5$UCSC_RefGene_Group, ";")
#character 0 should be transformed to NA or the lapply will fail
#anno5a<-lapply(anno4, function(x) if(identical(x, character(0))) NA_character_ else x)
#Replace for "Intergenic"
anno5a<-lapply(anno4, function(x) if(identical(x, NA_character_)) "Intergenic" else x)
anno5<-lapply(anno5a, `[[`, 1)

matcells5$Function = factor(unlist(anno5), levels= c("TSS1500", "TSS200", "1stExon","ExonBnd", "5'UTR","Body", "3'UTR", "Intergenic"))

matcells5$CGI<-ifelse(matcells5$Relation_to_UCSC_CpG_Island=="","OpenSea",matcells5$Relation_to_UCSC_CpG_Island)
matcells5$CGI2<-ifelse(matcells5$CGI=="N_Shelf"| matcells5$CGI=="S_Shelf","Shelves",
                       ifelse(matcells5$CGI=="N_Shore"| matcells5$CGI=="S_Shore","Shores",
                              matcells5$CGI))

matcells5$Enhancer<-ifelse(matcells5$Phantom5_Enhancers=="", "No", "Yes")
matcells5$DHS<-ifelse(matcells5$DNase_Hypersensitivity_NAME=="", "No", "Yes")
matcells5$OpenChromatin<-ifelse(matcells5$OpenChromatin_NAME=="", "No", "Yes")
matcells5$TFBS<-ifelse(matcells5$TFBS_NAME=="", "No", "Yes")
table(matcells5$CGI)
## 
##  Island N_Shelf N_Shore OpenSea S_Shelf S_Shore 
##     122     115     218     777      98     170
table(matcells5$Function)
## 
##    TSS1500     TSS200    1stExon    ExonBnd      5'UTR       Body      3'UTR 
##        192         81         31          0        169        650         77 
## Intergenic 
##        300
table(matcells5$Enhancer)
## 
##   No  Yes 
## 1424   76
table(matcells5$DHS)
## 
##   No  Yes 
##  499 1001
table(matcells5$OpenChromatin)
## 
##   No  Yes 
## 1326  174
table(matcells5$TFBS)
## 
##   No  Yes 
## 1307  193
table(matcells5$Methyl450_Loci)
## 
## TRUE 
## 1500
matcells5$Methyl450_Loci2<-ifelse(matcells5$Methyl450_Loci==TRUE, "Yes", "No")
annotation_row= data.frame(CGI=matcells5$CGI2, Function= matcells5$Function, Enhancer=matcells5$Enhancer, DHS=matcells5$DHS,
                           OpenChromatin=matcells5$OpenChromatin, TFBS=matcells5$TFBS, HM450k=matcells5$Methyl450_Loci2,
                           
                           row.names = rownames(coefEsts))
annotation_col=data.frame(CellType=colnames(coefEsts), row.names = colnames(coefEsts))
groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#NKT = "#7E6E85", 
NK = "#E6AB02")#, WBC = "white")
ann_colors = list(
    CellType = groups, #CGI=c(No="white", Yes="darkgray"), 
    Enhancer=c(No="white", Yes="darkgray"), 
    DHS=c(No="white", Yes="darkgray"),
                           OpenChromatin=c(No="white", Yes="darkgray"), TFBS=c(No="white", Yes="darkgray"),
    HM450k=c(No="white", Yes="darkgray"), 
    Function= c("TSS1500"="#D53E4F", "TSS200"="#F46D43",
            "1stExon"= "#FDAE61","ExonBnd"="#FEE08B",
            "5'UTR"="#E6F598","Body"="#ABDDA4",
            "3'UTR"= "#66C2A5", "Intergenic"="#3288BD"),
    CGI<-c("Island"= "#E41A1C", "Shores"="#377EB8",
       "Shelves"="#4DAF4A", "OpenSea"="#984EA3")
) 


try(pheatmap(coefEsts,color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
                  annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "L-DMRs IDOL legacy  450K", cluster_rows = T, show_rownames = F))#, labels_col = combined$CellType))

annotation_col=data.frame(CellType=referencePd$CellType, row.names = rownames(referencePd))


Ext_deconv_pheno$CellType<-ifelse(Ext_deconv_pheno$CellType=="Basophil", "Bas",
                             ifelse(Ext_deconv_pheno$CellType=="CD8T_EM", "CD8mem",
                                    ifelse(Ext_deconv_pheno$CellType=="CD8T_naive", "CD8nv", Ext_deconv_pheno$CellType)))

referencePd<-Ext_deconv_pheno[Ext_deconv_pheno$CellType%in%cellTypes,]
# referencePd$CellType<-ifelse(referencePd$CellType=="Basophil", "Bas",
#                              ifelse(referencePd$CellType=="CD8T_EM", "CD8mem",
#                                     ifelse(referencePd$CellType=="CD8T_naive", "CD8nv", referencePd$CellType)))
referenceBetas<-Ext_deconv_betas3[rownames(Ext_deconv_betas3)%in%rownames(annot[annot$Methyl450_Loci==TRUE,]), colnames(Ext_deconv_betas3)%in%rownames(referencePd)]
identical(colnames(referenceBetas), rownames(referencePd))
## [1] TRUE
try(pheatmap(referenceBetas[rownames(referenceBetas)%in%rownames(coefEsts),],color = colorRampPalette(c("yellow", "black", "blue"), space = "Lab")(128), annotation_colors = ann_colors,
             #      annotation_row = annotation_row, 
             annotation_col = annotation_col, main = "IDOL legacy DMRs 450K", cluster_rows = T, show_rownames = F, labels_col = referencePd$CellType))

cellcountsage<-FlowSorted.Blood.EPIC:::projectCellType(betas_aging[rownames(coefEsts),], coefEsts, lessThanOne = TRUE)

sumage<-rowSums(cellcountsage)
cellcountsage2<-round(cellcountsage*100/sumage,1)
head(cellcountsage2)
##            Bas Bmem  Bnv CD4mem CD4nv CD8mem CD8nv Eos Mono  Neu  NK Treg
## GSM1522958 0.2  1.7 23.2      0  10.7    6.3   0.8 3.4 14.5 23.6 1.3 14.3
## GSM1522959 0.0  0.0 18.3      0  15.2    7.5   9.2 0.0  5.7 22.3 1.6 20.2
## GSM1522960 0.9  0.0 19.1      0   3.1   16.8   8.5 0.0  2.5 28.1 4.3 16.7
## GSM1522961 1.8  3.2 12.7      0  20.5    5.0   8.3 0.0  6.5 22.5 4.3 15.1
## GSM1522962 2.1  0.0  9.9      0   0.0   13.3   0.0 0.0  6.4 49.2 0.0 19.1
## GSM1522963 3.1  1.4 21.4      0  12.5    9.3   5.1 2.7 10.0 16.5 2.7 15.4
pheno5<-cbind.data.frame(cellcountsage2, Age_groups=pheno_aging$Age_groups)
pheno5<-pheno5[order(pheno5$CD4mem),]
pheno5<-pheno5[order(pheno5$CD4nv),]
pheno5<-pheno5[order(pheno5$Neu),]

for (i in dput(levels(as.factor(pheno_aging$Age_groups)))){
    stack<-pheno5[pheno5$Age_groups==i,dput(colnames(coefEsts))]
    stack<-t(stack)
    dat2 <- melt(stack)
    colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
Bcell = "gold", Bnv = "#1B9E77", Bmem = "magenta", CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", 
NKT = "#7E6E85", NK = "#E6AB02")
    colors2<-colors1[levels(dat2$X1)]
    fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) + 
        geom_bar(stat="identity") +
        xlab("") +#xlab("\nSample") +
        ylab("") +#ylab("Cell %\n") + 
        geom_hline(yintercept=70, linetype="dashed", color = "red")+ 
        #guides(fill=T) +
        theme_bw()+
        ggtitle(paste("Cell projection extended in ", i)) + scale_fill_manual(values=colors2)+
        theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
        theme(legend.title=element_blank(), legend.position = "none") 
    #theme(legend.position="none")
    #detach(pheno2)
    assign(x = paste0(i,"_bar"),value = fetaladultspine)   
}
## c("Newborn", "<12mo", "12-18mo", "18-24mo", "2-5yr", "5-18yr", 
## "18-65yr", ">65yr")
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
paste0(c("Newborn", "<12mo", "12-18mo", "18-24mo", "2-5yr", "5-18yr", 
"18-65yr", ">65yr"), "_bar")
## [1] "Newborn_bar" "<12mo_bar"   "12-18mo_bar" "18-24mo_bar" "2-5yr_bar"  
## [6] "5-18yr_bar"  "18-65yr_bar" ">65yr_bar"
multiplot(  Newborn_bar , `<12mo_bar` ,  `12-18mo_bar` ,`18-24mo_bar`,`2-5yr_bar` ,  `5-18yr_bar` , `18-65yr_bar`, `>65yr_bar`,  cols=3)

boxplot(pheno5[pheno_aging$CellType=="WBC",dput(colnames(coefEsts))], main="All ages")
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")

par(mfrow=c(4,4))
for (i in dput(colnames(coefEsts))){
    boxplot(pheno5[pheno_aging$CellType=="WBC",i]~pheno5$Age_group[pheno_aging$CellType=="WBC"], main= i, xlab="Age group", ylab="Estimated cell %")
}
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
par(mfrow=c(4,4))

for (i in dput(colnames(coefEsts))){
    plot(pheno5[pheno_aging$CellType=="WBC",i]~pheno_aging$Age[pheno_aging$CellType=="WBC"], main= i, xlab="Age yrs", ylab="Estimated cell %",col="grey")
    fit2<-smooth.spline(pheno5[pheno_aging$CellType=="WBC",i]~pheno_aging$Age[pheno_aging$CellType=="WBC"],cv = TRUE)
#Plotting Regression Line
lines(fit2,lwd=2,col="purple")
}
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful

## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful

## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful

## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful

## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful

## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful

## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful

## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful

## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful

## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful

## Warning in smooth.spline(pheno5[pheno_aging$CellType == "WBC", i] ~
## pheno_aging$Age[pheno_aging$CellType == : cross-validation with non-unique 'x'
## values seems doubtful
#validation
tmp2<-Ext_deconv_betas3[, Ext_deconv_pheno$CellType=="WBC"]
#tmp2<-tmp2[,1:6]


cellcountsval<-FlowSorted.Blood.EPIC:::projectCellType(tmp2[rownames(coefEsts),], as.matrix(coefEsts), lessThanOne = TRUE)
sumage<-rowSums(cellcountsval)
cellcountsval2<-round(cellcountsval*100/sumage,1)
head(cellcountsval2)
##                     Bas Bmem Bnv CD4mem CD4nv CD8mem CD8nv Eos Mono  Neu  NK
## 202163530006_R02C01 1.1  0.0 2.2    0.4  14.4    6.5   1.2 3.8  8.1 55.6 2.4
## 202163530006_R03C01 1.6  0.2 4.9    5.0   9.5   11.3   6.0 2.2  6.5 43.5 7.6
## 202163530006_R04C01 0.1  0.0 5.3    0.3   8.3    7.2   2.0 0.0  7.2 65.0 3.0
## 202163530006_R06C01 0.6  0.1 6.5    8.2  14.8    9.2   6.9 4.5  6.7 33.3 6.0
## 202163530006_R07C01 0.5  0.0 3.5    1.3   4.8    8.1   1.4 0.0  6.9 67.4 2.9
## 202163530006_R08C01 0.5  2.0 4.2    8.6   3.7   20.4   0.0 3.6 11.5 37.3 6.7
##                     Treg
## 202163530006_R02C01  4.2
## 202163530006_R03C01  1.6
## 202163530006_R04C01  1.8
## 202163530006_R06C01  3.1
## 202163530006_R07C01  3.2
## 202163530006_R08C01  1.4
pheno6<-cbind.data.frame(cellcountsval2, CellType=rep("WBC",6))


stack<-pheno6[pheno6$CellType=="WBC",dput(colnames(coefEsts))]
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
    stack<-t(stack)
    dat2 <- melt(stack)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
    #dat2$X1 <- levels(dat2$X1, ref=i)
    #levels(dat2$X1)
    #dat2$X1 <-fct_rev(dat2$X1)
    #levels(dat2$X1)
    colors1<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
Bcell = "gold", Bnv = "#1B9E77", Bmem = "magenta", CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", CD8T = "darkgray", 
CD8Tnv = "#7570B3", CD8mem = "maroon", 
NKT = "#7E6E85", NK = "#E6AB02")
    colors2<-colors1[levels(dat2$X1)]
    fetaladultspine<- ggplot(dat2, aes(x=X2, y=value, fill=X1)) + 
        geom_bar(stat="identity") +
        xlab("") +#xlab("\nSample") +
        ylab("") +#ylab("Cell %\n") + 
        geom_hline(yintercept=70, linetype="dashed", color = "red")+ 
        #guides(fill=T) +
        theme_bw()+
        ggtitle(paste("Extended projection ", "WBC")) + scale_fill_manual(values=colors2)+
        theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
        theme(legend.title=element_blank(), legend.position = "right") 
    #theme(legend.position="none")
    #detach(pheno2)
    assign(x = paste0("WBC_extended","_bar"),value = fetaladultspine)

#multiplot(  WBC_bar, WBC_FACS_bar, WBC_extended_bar, cols=3)

#Figure 5

cellcpgs<-c("cg19486047",#SPTBN1 Bas"
            "cg01884715", #GRB2 Bmem #"cg17291906" #SWAP70
            "cg06243638", #P4HA2 Bnv
            "cg16008150", #LIMS1 CD4mem
            "cg23788058", #CHD7 CD4nv
            "cg07616471",#CCR5 CD8mem
            "cg05496363",#CD248 CD8nv
            "cg17374802", #EPX Eos
            "cg00771778", #LYST Mono
            "cg05501357", #HIPK3 Neu
            "cg08326410", #KIR2DL4 NK
            "cg22572158")#CTLA4 Treg

betascpgs<-getBeta(FlowSorted.BloodExtended.EPIC)

betascpgs<-betascpgs[rownames(betascpgs)%in%cellcpgs,]
phenoref<-as.data.frame(pData(FlowSorted.BloodExtended.EPIC))
identical(rownames(phenoref), colnames(betascpgs))
## [1] TRUE
par(mfrow=c(1,1))
boxplot(t(betascpgs)~phenoref$CellType)

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
Bnv = "#1B9E77", Bmem = "magenta",  
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", 
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")
cellTypes2 = names(groups)

titlecpgs<-c("cg19486047"="SPTBN1",#TESC Bas
            "cg01884715"="GRB2",# Bmem
            "cg06243638"= "P4HA2",# Bnv
            "cg16008150"= "LIMS1",#CD4mem #"cg05770620"="TRRAP",# CD4mem
            "cg23788058"="CHD7",# CD4nv
            "cg07616471"="CCR5",# CD8mem
            "cg05496363"="CD248",#CD8nv
            "cg17374802"="EPX",# Eos
            "cg00771778"="LYST",# Mono
            "cg05501357"="HIPK3",# Neu
            "cg08326410"="KIR2DL4",# NK
            "cg22572158"="CTLA4")# Treg
colorcelltype<-groups
colorcelltype<-colorcelltype[cellTypes2]
phenoref$CellType<-as.character(phenoref$CellType)
phenoref$CellType<-factor(phenoref$CellType, levels = cellTypes2)
par(mfrow=c(3,4))
for(i in cellcpgs){
    boxplot(betascpgs[i,]~phenoref$CellType, ylim=c(0,1), xlab="", ylab=expression(beta-value), main=paste(titlecpgs[i],i),
            #cex=2, cex.lab=1.3, cex.axis=1.3, cex.main=1.3, cex.sub=1.3,
            boxfill= colorcelltype, las=2, frame.plot = FALSE)
  box(bty="l")
}

load("I:/Dropbox (Christensen Lab)/Longitudinal data/Longitudinal data.RData")
longData<-updateObject(longData)
longpheno<-as.data.frame(pData(longData))
longsesame<-preprocessNoob(longData)#sesamize(longData)
library(readr)
## 
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
## 
##     col_factor
## The following object is masked from 'package:genefilter':
## 
##     spec
FACSlong <- read_csv("I:/Dropbox (Christensen Lab)/Longitudinal data/FACSlong.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_character(),
##   `characteristics: Chip` = col_double(),
##   `characteristics: Time point (days)` = col_double(),
##   `characteristics: Slide` = col_double(),
##   Lymph = col_double(),
##   Mono = col_double(),
##   Gran = col_double(),
##   CD4T = col_double(),
##   CD8T = col_double(),
##   nonTcell = col_double()
## )
## i Use `spec()` for the full column specifications.
FACSlong<-as.data.frame(FACSlong)
rownames(FACSlong)<-FACSlong$`Sample name`


tmp3<-getBeta(Ext_deconv_rgset)[,Ext_deconv_pheno$CellType=="WBC"]

stack<-as.data.frame(FACSlong[,c("Lymph",   "Mono",  "Gran",   "CD4T",   "CD8T", "nonTcell")])

cellcountsval<-projectCellType_CP(getBeta(longsesame)[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)

cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$nonTcell<-cellcountsval$Bcell+cellcountsval$NK
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas





cellTypes<-c("Mono",  "Gran",   "CD4T",   "CD8T", "nonTcell" )
cellcolors<-c( nonTcell="#1B9E77", #CD4mem="darkblue", Bas= "#E41A1C", 
                   CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83", 
           #Eos="#3E8E93",
           Gran="#66A61E", Mono="#E7298A")#,NK="#E6AB02", #NKT= "#7E6E85", 
           #Treg="black")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues*100,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))
for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("Mono",  "Gran",   "CD4T",   "CD8T", "nonTcell" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85", 
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL



library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)


stack2<-t(round(stack[complete.cases(stack),cellTypes]*100,1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#NKT = "#7E6E85", 
NK = "#E6AB02")#, WBC = "white")

for (i in names(groups))
dat2[,i]<-0

for(i in c( "Mono" ))
  dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="nonTcell",c("Bnv","Bmem", "NK")]<-cellcount_EPICIDOL[,c("Bnv","Bmem", "NK")]
dat2[dat2$X1=="Gran",c("Bas", "Eos","Neu")]<-cellcount_EPICIDOL[,c("Bas", "Eos","Neu")]

dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +#xlab("\nSample") +
        ylab("Predicted (%)") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        #guides(fill=T) +
        theme_classic(base_size = 20)+ theme(text = element_text(size=18))+
        ggtitle("Peripheral adult samples with FCM+CBC data (EPIC IDOL-ext)") + scale_fill_manual(values=groups)+ theme(legend.position = "none")+
        #theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval

FlowSorted.Blood.EPIC
## class: RGChannelSet 
## dim: 1051815 49 
## metadata(0):
## assays(2): Green Red
## rownames(1051815): 1600101 1600111 ... 99810990 99810992
## rowData names(0):
## colnames(49): 201868500150_R01C01 201868500150_R03C01 ...
##   201870610111_R06C01 201870610111_R07C01
## colData names(32): Sample_Plate Sample_Well ... filenames normalmix
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b4.hg19
EPICmix<-FlowSorted.Blood.EPIC[,FlowSorted.Blood.EPIC$CellType=="MIX"]
EPICpheno<-as.data.frame(pData(EPICmix))
EPICmixsesame<-preprocessNoob(EPICmix)


stack<-as.data.frame(EPICpheno[,c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Neu")])
stack$Gran<-stack$Neu

cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)

cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$nonTcell<-cellcountsval$Bcell+cellcountsval$NK
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas





cellTypes<-c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Gran" )
cellcolors<-c( Bcell="#1B9E77", #CD4mem="darkblue", Bas= "#E41A1C", 
                   CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83", 
           #Eos="#3E8E93",
           Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85", 
           #Treg="black")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))
for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Gran")
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85", 
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL



library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)


stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#NKT = "#7E6E85", 
NK = "#E6AB02")#, WBC = "white")

for (i in names(groups))
dat2[,i]<-0

for(i in c( "Mono", "NK" ))
  dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c("Bas", "Eos","Neu")]<-cellcount_EPICIDOL[,c("Bas", "Eos","Neu")]

dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval2<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +
        ylab("Predicted (%)") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        theme_classic(base_size = 20)+ theme(text = element_text(size=18))+
        ggtitle("Adult peripheral blood isolated cells artificial mixtures (EPIC IDOL-ext)") + scale_fill_manual(values=groups)+ theme(legend.position = "none")+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval2

CB_mixtures

CB_mixpheno<- read.metharray.sheet(base = "I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data", pattern = "CB_mixinfo.csv")
## [read.metharray.sheet] Found the following CSV files:
## [1] "I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/CB_mixinfo.csv"
rownames(CB_mixpheno)<-CB_mixpheno$Array_well
CB_mixpheno$CD8nv<-CB_mixpheno$CD8T_naive


cellTypes = c(#"Basophil", "Bmem", 
  "Bnv", #"CD4mem", 
  "CD4nv", "CD4T",#"CD8T_CM", 
  #            "CD8T_EM", 
"CD8nv", #"Eos", #"MIX", 
"Mono", "Neu", "NK")#, #"NKT", 
#"Treg")#,"WBC")

truevaluesCB<-CB_mixpheno[CB_mixpheno$CellType=="CB_Mix", cellTypes]
# library(ExperimentHub)
# hub <- ExperimentHub()
# query(hub, "FlowSorted.Blood.EPIC")
# FlowSorted.Blood.EPIC <- hub[["EH1136"]]
# FlowSorted.Blood.EPIC
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/CordbloodEPICsesame.rda")
CordbloodEPICsesame
## class: GenomicRatioSet 
## dim: 865859 52 
## metadata(1): SNPs
## assays(2): Beta CN
## rownames(865859): cg14817997 cg26928153 ... cg07587934 cg16855331
## rowData names(0):
## colnames(52): 203013220070_R04C01 203013220070_R06C01 ...
##   203808720062_R04C01 203808720062_R05C01
## colData names(34): Sample_Name Sample_Well ... Sex Exclude
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b4.hg19
## Preprocessing
##   Method: SeSAMe (type I)
##   minfi version: 1.34.0
##   Manifest version: 0.6.0
CordbloodEPICset
## class: MethylSet 
## dim: 866091 52 
## metadata(0):
## assays(2): Meth Unmeth
## rownames(866091): cg18478105 cg09835024 ... cg10633746 cg12623625
## rowData names(0):
## colnames(52): 203013220070_R04C01 203013220070_R06C01 ...
##   203808720062_R04C01 203808720062_R05C01
## colData names(33): Sample_Name Sample_Well ... Fetal Sex
## Annotation
##   array: IlluminaHumanMethylationEPIC
##   annotation: ilm10b2.hg19
## Preprocessing
##   Method: NA
##   minfi version: NA
##   Manifest version: NA
EPICmix<-CordbloodEPICset[,CordbloodEPICset$CellType=="CB_Mix"]
EPICpheno<-as.data.frame(CB_mixpheno[CB_mixpheno$CellType=="CB_Mix",])
EPICmixsesame<-EPICmix#sesamize(EPICmix)


stack<-as.data.frame(EPICpheno[,c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Neu")])
stack$Gran<-stack$Neu

cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)

cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$nonTcell<-cellcountsval$Bcell+cellcountsval$NK
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas





cellTypes<-c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Gran" )
cellcolors<-c( Bcell="#1B9E77", #CD4mem="darkblue", Bas= "#E41A1C", 
                   CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83", 
           #Eos="#3E8E93",
           Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85", 
           #Treg="black")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))
for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("CD8T", "CD4T", "NK", "Bcell", "Mono", "Gran")
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85", 
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL



library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)


stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#NKT = "#7E6E85", 
NK = "#E6AB02")#, WBC = "white")

for (i in names(groups))
dat2[,i]<-0

for(i in c( "Mono", "NK" ))
  dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c("Bas", "Eos","Neu")]<-cellcount_EPICIDOL[,c("Bas", "Eos","Neu")]

dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval2a<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +#xlab("\nSample") +
        ylab("Predicted (%)") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        #guides(fill=T) +
        theme_classic(base_size = 20)+ theme(text = element_text(size=18))+
        ggtitle("Cell projection in umbilical cord blood artificial mixtures") + scale_fill_manual(values=groups)+ theme(legend.position = "none")+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval2a

# sheet <- read.metharray.sheet(base = "D:/OneDrive/OneDrive - Dartmouth College/Cord blood letter to the editor/archive", pattern = "WBasmplesheetMay2016.csv")
# #workdir<-"I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data"
# JonesepicData <- read.metharray.exp(targets = sheet,extended = TRUE, force = TRUE)
# Jonessesame<-sesamize(JonesepicData)
# save(JonesepicData, Jonessesame, file= "Raw_data_Jones.RData")

load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/Raw_data_Jones.RData")

stack<-as.data.frame(pData(Jonessesame))[,c("nRBC", "Bcell","CD4T",  "CD8T", "Gran",  "Mono", "NK" )]
stack$myeloid_Bcell<-stack$Gran+stack$nRBC+stack$Mono+stack$Bcell
stack$T_NK_lymphoid<-stack$CD8T+stack$CD4T+stack$NK
stack$myeloid<-stack$Gran+stack$nRBC
#stack<-as.data.frame(FACSlong[,c("Lymph",   "Mono",  "Gran",   "CD4T",   "CD8T", "nonTcell")])
Jonessesame<-preprocessNoob(JonesepicData)
cellcountsval<-projectCellType_CP(getBeta(Jonessesame)[IDOLOptimizedCpGsBloodExtended450k,], FlowSorted.BloodExtended.450klegacy.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)

cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv

cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$nRBC<-cellcountsval$Bas
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos
cellcountsval$myeloid<-cellcountsval$Gran+cellcountsval$nRBC
cellcountsval$myeloid_Bcell<-cellcountsval$Gran+cellcountsval$nRBC+cellcountsval$Mono+cellcountsval$Bcell
cellcountsval$T_NK_lymphoid<-cellcountsval$CD8T+cellcountsval$CD4T+cellcountsval$NK




cellTypes<-c("nRBC", "Bcell","CD4T",  "CD8T", "Gran",  "Mono", "NK" )
cellcolors<-c( nRBC= "#E41A1C", Bcell="#1B9E77", #CD4mem="darkblue", 
                   CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83", 
           #Eos="#3E8E93",
           Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85", 
           #Treg="black")
truevalues<-stack#[complete.cases(stack),]
#truevalues<-round(truevalues*100,1)
cellcount_EPICIDOL<-cellcountsval#[rownames(truevalues),]

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))
for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("nRBC", "Bcell","CD4T",  "CD8T", "Gran",  "Mono", "NK" )
cellcolors<-c( nRBC= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85", 
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL



library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)


stack2<-t(stack[,cellTypes])
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#NKT = "#7E6E85", 
NK = "#E6AB02")#, WBC = "white")

for (i in names(groups))
dat2[,i]<-0

for(i in c("nRBC", "Mono", "NK" ))
  dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c( "Eos","Neu")]<-cellcount_EPICIDOL[,c( "Eos","Neu")]

dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK", "nRBC")
plotval3<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=1), data=dat2, cols=c("nRBC", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +#xlab("\nSample") +
        ylab("Predicted (%)") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        #guides(fill=T) +
        theme_classic(base_size = 16)+ #theme(text = element_text(size=rel(2)))+
        ggtitle("Cell projection extended versus FACS data") + scale_fill_manual(values=groups)+
        #theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval3
## Warning: Removed 1728 rows containing non-finite values (stat_pie).
#Collapsing all the myeloid

cellTypes<-c( "Bcell","CD4T",  "CD8T", "myeloid",  "Mono", "NK" )
cellcolors<-c( #nRBC= "#E41A1C", 
  Bcell="#1B9E77", #CD4mem="darkblue", 
                   CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83", 
           #Eos="#3E8E93",
           myeloid="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85", 
           #Treg="black")
truevalues<-stack#[complete.cases(stack),]
#truevalues<-round(truevalues*100,1)
cellcount_EPICIDOL<-cellcountsval#[rownames(truevalues),]

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))

for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("myeloid", "Bcell","CD4T",  "CD8T", "Gran",  "Mono", "NK" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85", 
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL



library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)


stack2<-t(stack[,cellTypes])
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#NKT = "#7E6E85", 
NK = "#E6AB02")#, WBC = "white")

for (i in names(groups))
dat2[,i]<-0

for(i in c("Mono", "NK" ))
  dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="myeloid",c( "Bas","Eos","Neu")]<-cellcount_EPICIDOL[,c( "Bas","Eos","Neu")]

dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval3a<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +#xlab("\nSample") +
        ylab("Predicted (%)") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        #guides(fill=T) +
        theme_classic(base_size = 20)+ theme(text = element_text(size=18))+
        ggtitle("Cell projection extended versus FACS data") + scale_fill_manual(values=groups)+
        theme(legend.position = "none")+#element_text(angle = -90, hjust = 0))+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval3a



#Myeloid/lymphoid only


cellTypes<-c("myeloid_Bcell", "T_NK_lymphoid" )
cellcolors<-c( myeloid_Bcell= "black", T_NK_lymphoid="lightgray")
# cellcolors<-c( nRBC= "#E41A1C", Bcell="#1B9E77", #CD4mem="darkblue", 
#                    CD4T="#D95F02", CD8T ="#7570B3",#CD8T_mem="maroon", #CD8T_CM="#705C83", 
#            #Eos="#3E8E93",
#            Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")#, #NKT= "#7E6E85", 
#            #Treg="black")
truevalues<-stack#[complete.cases(stack),]
#truevalues<-round(truevalues*100,1)
cellcount_EPICIDOL<-cellcountsval#[rownames(truevalues),]

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))

for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("myeloid_Bcell", "T_NK_lymphoid" )
cellcolors<-c( nRBC= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8T ="#7570B3", CD8mem="maroon", #CD8T_CM="#705C83", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", #NKT= "#7E6E85", 
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL



library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)


stack2<-t(stack[,cellTypes])
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(nRBC = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
#Bcell = "gold", 
Bnv = "#1B9E77", Bmem = "magenta", #CD4T = "lightgray", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", #CD8T = "darkgray", 
CD8nv = "#7570B3", CD8mem = "maroon", #CD8T_CM = "#705C83", 
#NKT = "#7E6E85", 
NK = "#E6AB02")#, WBC = "white")

for (i in names(groups))
dat2[,i]<-0


dat2[dat2$X1=="T_NK_lymphoid",c("CD4nv","CD4mem", "Treg", "CD8nv","CD8mem",  "NK")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg", "CD8nv","CD8mem",  "NK")]
dat2[dat2$X1=="myeloid_Bcell",c( "Eos","Neu", "Mono", "Bnv","Bmem","nRBC")]<-cellcount_EPICIDOL[,c( "Eos","Neu", "Mono", "Bnv","Bmem","nRBC")]

dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "nRBC", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval4<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=1), data=dat2, cols=c("nRBC", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +#xlab("\nSample") +
        ylab("Predicted (%)") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        #guides(fill=T) +
        theme_classic(base_size = 16)+ #theme(text = element_text(size=rel(2)))+
        ggtitle("Cell projection extended versus FACS data") + scale_fill_manual(values=groups)+
        #theme(axis.text.x=element_blank())+#element_text(angle = -90, hjust = 0))+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval4



# countsextnRBCref2<-projectCellType_CP(getBeta(JonesMsetEx)[IDOLopt_CB_adult_2000$`IDOL Optimized Library`,], IDOLopt_CB_adult_2000$`IDOL Optimized CoefEsts`, lessThanOne = TRUE)
# countsextnRBCref<-round(countsextnRBCref*100,1)
# boxplot(countsextnRBCref)

load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/UCSF_sesame.rda")
load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GBM UCSF/Unfiltered.rda")
#UCSF_sesame
EPICmix<-preprocessNoob(epicData)#UCSF_sesame
colnames(EPICmix)<-colnames(UCSF_betas)
UCSF_pheno$CD8nv<-UCSF_pheno$CD8T_naive
UCSF_pheno$CD8mem<-UCSF_pheno$CD8Tmem
UCSF_pheno$nonTcell<-100-(UCSF_pheno$CD4mem+UCSF_pheno$CD4nv+UCSF_pheno$CD8nv+UCSF_pheno$CD8mem)
EPICpheno<-as.data.frame(UCSF_pheno)
EPICmixsesame<-EPICmix#sesamize(EPICmix)
colnames(EPICmixsesame)<-rownames((UCSF_pheno))

stack<-as.data.frame(EPICpheno[,c("CD8nv", "CD8mem", "CD4mem", "CD4nv", "nonTcell")])


cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended,], FlowSorted.BloodExtended.EPIC.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)

cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4mem2<-cellcountsval$CD4mem
cellcountsval$CD4mem<-cellcountsval$CD4mem+cellcountsval$Treg
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$nonTcell<-cellcountsval$Bcell+cellcountsval$NK+cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas

cellTypes<-c("CD8nv", "CD4nv", "CD8mem", "CD4mem", "nonTcell" )
cellcolors<-c( CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8nv ="#7570B3",CD8mem="maroon", nonTcell="#66A61E")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))
for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("CD8nv", "CD4nv", "CD8mem", "CD4mem", "nonTcell" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8nv ="#7570B3", CD8mem="maroon", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",  
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL



library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)


stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
Bnv = "#1B9E77", Bmem = "magenta", 
CD4nv = "#D95F02", CD4mem2 = "darkblue", Treg = "black", 
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")

for (i in names(groups))
dat2[,i]<-0

for(i in c( "CD4nv", "CD8nv", "CD8mem" ))
  dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4mem",c("CD4mem2", "Treg")]<-cellcount_EPICIDOL[,c("CD4mem2", "Treg")]
dat2[dat2$X1=="nonTcell",c("Bnv","Bmem", "Bas", "Eos","Neu")]<-cellcount_EPICIDOL[,c("Bnv","Bmem", "Bas", "Eos","Neu")]

dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem2", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval5<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem2",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +#xlab("\nSample") +
        ylab("Predicted (%)") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        theme_classic(base_size = 20)+ theme(text = element_text(size=18))+
        ggtitle("Adult peripheral samples from glioma patients with FCM data \n(EPIC IDOL-ext)") + scale_fill_manual(values=groups)+
        theme(legend.position = "none")+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval5

library(FlowSorted.Blood.450k)
FlowSorted.Blood.450k
## class: RGChannelSet 
## dim: 622399 60 
## metadata(0):
## assays(2): Green Red
## rownames(622399): 10600313 10600322 ... 74810490 74810492
## rowData names(0):
## colnames(60): WB_105 WB_218 ... Eos_160 Eos_149
## colData names(8): Sample_Name Slide ... CellType Sex
## Annotation
##   array: IlluminaHumanMethylation450k
##   annotation: ilmn12.hg19
EPICmix<-FlowSorted.Blood.450k[,FlowSorted.Blood.450k$CellType%in%c("WBC", "PBMC", "Gran")]
table(EPICmix$CellType)
## 
## Gran PBMC  WBC 
##    6    6    6
EPICpheno<-read.csv("Reinius.csv", row.names = 2)
EPICpheno<-EPICpheno[colnames(EPICmix),]


identical(colnames(EPICmix), rownames(EPICpheno))
## [1] TRUE
EPICmixsesame<-preprocessNoob(EPICmix)#sesamize(EPICmix)
EPICmixsesame<-EPICmixsesame[,order(colnames(EPICmixsesame))]
EPICpheno<-EPICpheno[order(rownames(EPICpheno)),]
identical(colnames(EPICmixsesame), rownames(EPICpheno))
## [1] TRUE
stack<-as.data.frame(EPICpheno[,c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Neu", "Eos")])
stack$Gran<-stack$Neu+stack$Eos

cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended450k,], FlowSorted.BloodExtended.450klegacy.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)

cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas

cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bcell="#1B9E77", 
                   CD4T="#D95F02", CD8T ="#7570B3",
               Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))
for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8nv ="#7570B3", CD8mem="maroon", 
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02",  
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
identical(rownames(truevalues), rownames(cellcount_EPICIDOL))
## [1] TRUE
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)


stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
Bnv = "#1B9E77", Bmem = "magenta", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", 
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")

for (i in names(groups))
dat2[,i]<-0

for(i in c( "Mono",  "NK" ))
  dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c( "Bas","Eos","Neu")]<-cellcount_EPICIDOL[,c( "Bas","Eos","Neu")]

dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval6<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +#xlab("\nSample") +
        ylab("Predicted (%)") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        #guides(fill=T) +
        theme_classic(base_size = 20)+ #theme(text = element_text(size=rel(2)))+
        ggtitle("Reinius reported FACS") + scale_fill_manual(values=groups)+
        theme(legend.position = "none")+#element_text(angle = -90, hjust = 0))+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval6

#Only WBC
stack<-as.data.frame(EPICpheno[EPICpheno$CellType=="WBC",c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Neu", "Eos")])
stack$Gran<-stack$Neu+stack$Eos

cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended450k,EPICpheno$CellType=="WBC"], FlowSorted.BloodExtended.450klegacy.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas

cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bcell="#1B9E77", 
               CD4T="#D95F02", CD8T ="#7570B3",Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))

for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8nv ="#7570B3", CD8mem="maroon",  
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", 
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
identical(rownames(truevalues), rownames(cellcount_EPICIDOL))
## [1] TRUE
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)


stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
Bnv = "#1B9E77", Bmem = "magenta", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",  
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")

for (i in names(groups))
dat2[,i]<-0

for(i in c( "Mono",  "NK" ))
  dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c( "Bas","Eos","Neu")]<-cellcount_EPICIDOL[,c( "Bas","Eos","Neu")]

dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval6a<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +#xlab("\nSample") +
        ylab("Predicted (%)") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
         theme_classic(base_size = 20)+ #theme(text = element_text(size=rel(2)))+
        ggtitle("Reinius reported FACS") + scale_fill_manual(values=groups)+
        theme(legend.position = "none")+#element_text(angle = -90, hjust = 0))+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval6a

#450k artificial mixtures and FACS
# library(minfi)
# library(GEOquery)
# getGEOSuppFiles("GSE77797")
# untar("GSE77797/GSE77797_RAW.tar", exdir = "GSE77797/idat")
# head(list.files("GSE77797/idat", pattern = "idat"))
# 
# idatFiles <- list.files("GSE77797/idat", pattern = "idat.gz$", full = TRUE)
# sapply(idatFiles, gunzip, overwrite = TRUE)
# rgSet <- read.metharray.exp("GSE77797/idat")
# rgSet
# 
# 
# geoMat <- getGEO("GSE77797")
# pD.all <- pData(geoMat[[1]])
# pD <- pD.all#[, c("title", "geo_accession", "characteristics_ch1.1", "characteristics_ch1.2")]
# head(pD)
# 
# 
# #names(pD)[c(3,4)] <- c("group", "sex")
# pD$CD4T <- pD$`cd4+ t cell (%):ch1`
# pD$CD8T <- pD$`cd8+ t cell (%):ch1`
# pD$NK<-pD$`natural killer cell (%):ch1`
# pD$Gran<-pD$`granulocyte (%):ch1`
# pD$Bcell<-pD$`b cell (%):ch1`
# pD$Mono<-pD$`monocyte (%):ch1`
# table(pD$source_name_ch1)
# 
# pD$CellType<-ifelse(pD$source_name_ch1=="Adult human whole blood", "WBC", "MIX")
# 
# #sampleNames(rgSet) <- sub(".*_5", "5", sampleNames(rgSet))
# rownames(pD) <- substr(as.character(pD$description),2,18)
# colnames(rgSet)<- substr(as.character(colnames(rgSet)),12,28)
# pD <- pD[sampleNames(rgSet),]
# identical(rownames(pD), colnames(rgSet))
# pData(rgSet) <- DataFrame(pD, row.names = rownames(pD))
# rgSet
# GSE77797<-rgSet
# 
# library(sesame)
# 
# GSE77797sesame<-sesamize(GSE77797)

load("I:/Dropbox (Christensen Lab)/Extended library/Raw_Data/Raw_Data/GSE77797.rda")

EPICmix<-GSE77797[,GSE77797$CellType%in%c("WBC", "MIX")]
table(EPICmix$CellType)
## 
## MIX WBC 
##  12   6
EPICpheno<-as.data.frame(pData(EPICmix))

identical(colnames(EPICmix), rownames(EPICpheno))
## [1] TRUE
EPICmixsesame<-preprocessNoob(GSE77797)#GSE77797sesame
EPICmixsesame<-EPICmixsesame[,order(colnames(EPICmixsesame))]
EPICpheno<-EPICpheno[order(rownames(EPICpheno)),]

identical(colnames(EPICmixsesame), rownames(EPICpheno))
## [1] TRUE
for (i in c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran")){
  EPICpheno[,i]<-as.numeric(EPICpheno[,i])
}

#Only WBC
stack<-as.data.frame(EPICpheno[EPICpheno$CellType=="WBC",c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran")])

cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended450k,EPICpheno$CellType=="WBC"], FlowSorted.BloodExtended.450klegacy.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)

cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas

cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bcell="#1B9E77", 
                   CD4T="#D95F02", CD8T ="#7570B3",
           Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))
for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8nv ="#7570B3", CD8mem="maroon",  
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", 
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
identical(rownames(truevalues), rownames(cellcount_EPICIDOL))
## [1] TRUE
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)


stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
Bnv = "#1B9E77", Bmem = "magenta", 
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black",  
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")

for (i in names(groups))
dat2[,i]<-0

for(i in c( "Mono",  "NK" ))
  dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c( "Bas","Eos","Neu")]<-cellcount_EPICIDOL[,c( "Bas","Eos","Neu")]

dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval7<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +#xlab("\nSample") +
        ylab("Predicted (%)") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        theme_classic(base_size = 20)+ 
        ggtitle("Adult peripheral blood with FACS (450k)") + scale_fill_manual(values=groups)+
        theme(legend.position = "none")+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval7

#Artificial mixtures 450k

#Only WBC
stack<-as.data.frame(EPICpheno[EPICpheno$CellType=="MIX",c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran")])

cellcountsval<-projectCellType_CP(getBeta(EPICmixsesame)[IDOLOptimizedCpGsBloodExtended450k,EPICpheno$CellType=="MIX"], FlowSorted.BloodExtended.450klegacy.compTable, lessThanOne = TRUE)
boxplot(cellcountsval)
cellcountsval<-round(cellcountsval*100,2)
cellcountsval<-as.data.frame(cellcountsval)
cellcountsval$CD8T<-cellcountsval$CD8mem+cellcountsval$CD8nv
cellcountsval$CD4T<-cellcountsval$CD4mem+cellcountsval$CD4nv+cellcountsval$Treg
cellcountsval$Bcell<-cellcountsval$Bnv+cellcountsval$Bmem
cellcountsval$Gran<-cellcountsval$Neu+cellcountsval$Eos+cellcountsval$Bas

cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bcell="#1B9E77", 
                   CD4T="#D95F02", CD8T ="#7570B3",
           Gran="#66A61E", Mono="#E7298A",NK="#E6AB02")
truevalues<-stack[complete.cases(stack),]
truevalues<-round(truevalues,1)
cellcount_EPICIDOL<-cellcountsval[rownames(truevalues),]

K = length(cellTypes)  # number of cell types

matrixr<-data.frame(Celltypes=cellTypes, R2=rep(NA, length(cellTypes)), RMSE= rep(NA, length(cellTypes)), row.names=cellTypes)

par(mfrow=c(3,3))

for(k in 1:K) {
    pred = as.numeric(cellcount_EPICIDOL[,cellTypes[k]])
    obs = as.numeric(truevalues[,cellTypes[k]])
    R2 = cor(obs, pred, method = "pearson")^2
    RMSE = sqrt(sum((pred - obs)^2/length(pred)))
    matrixr$R2[k]<-round(R2,2)
    matrixr$RMSE[k]<-round(RMSE,2)

    rangexy = range(c(pred,obs))

    par(mar = c(5,5,4,2))
    plot(obs, pred, cex = 2, col = cellcolors[cellTypes[k]], pch = 19, 
        xlim = rangexy, ylim = rangexy, main = "",
        xlab = "Observed (%)", ylab = "Predicted (%)",
        cex.main = 2, cex.lab = 1.5, cex.axis = 1.3)
    title(main = paste(cellTypes[k], "\nRMSE = ", 
          round(RMSE, 2), "; R2 = ", round(R2, 2), sep = ""), 
          cex.main = 2)
    abline(a = 0, b = 1, lwd = 2, lty = "dotted")
}

cellTypes<-c("CD4T", "CD8T", "Mono", "Bcell", "NK", "Gran" )
cellcolors<-c( Bas= "#E41A1C", Bcell="#1B9E77", CD4mem="darkblue", 
                   CD4nv="#D95F02", CD8nv ="#7570B3", CD8mem="maroon",  
           Eos="#3E8E93",
           Neu="#66A61E", Mono="#E7298A",NK="#E6AB02", 
           Treg="black")

truevalues<-truevalues
cellcount_EPICIDOL<-cellcount_EPICIDOL
identical(rownames(truevalues), rownames(cellcount_EPICIDOL))
## [1] TRUE
library(ggplot2)
library(scatterpie)
library(reshape2)
library(gridExtra)


stack2<-t(round(stack[complete.cases(stack),cellTypes],1))
dat2 <- melt(stack2)#, id.vars = "Sample")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat2$X1<-as.character(dat2$X1)
dat2<-dat2[order(as.character(dat2$X2)),]
dat2<-dat2[order(as.character(dat2$X1)),]

stack3<-t(cellcount_EPICIDOL[,colnames(cellcount_EPICIDOL)%in%cellTypes])
dat3 <- melt(stack3)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
dat3$X1<-as.character(dat3$X1)
dat3<-dat3[order(as.character(dat3$X2)),]
dat3<-dat3[order(as.character(dat3$X1)),]

identical(dat2$X2, dat3$X2)
## [1] TRUE
identical(as.character(dat2$X1), as.character(dat3$X1))
## [1] TRUE
dat2$obs<-dat2$value
dat2$pred<-dat3$value

groups<-c(Bas = "#E41A1C", Eos = "#3E8E93", Neu = "#66A61E", Mono = "#E7298A", 
Bnv = "#1B9E77", Bmem = "magenta",  
CD4nv = "#D95F02", CD4mem = "darkblue", Treg = "black", 
CD8nv = "#7570B3", CD8mem = "maroon", NK = "#E6AB02")

for (i in names(groups))
dat2[,i]<-0

for(i in c( "Mono",  "NK" ))
  dat2[dat2$X1==i,i]<-cellcount_EPICIDOL[,i]
dat2[dat2$X1=="CD4T",c("CD4nv","CD4mem", "Treg")]<-cellcount_EPICIDOL[,c("CD4nv","CD4mem", "Treg")]
dat2[dat2$X1=="CD8T",c("CD8nv","CD8mem")]<-cellcount_EPICIDOL[,c("CD8nv","CD8mem")]
dat2[dat2$X1=="Bcell",c("Bnv","Bmem")]<-cellcount_EPICIDOL[,c("Bnv","Bmem")]
dat2[dat2$X1=="Gran",c( "Bas","Eos","Neu")]<-cellcount_EPICIDOL[,c( "Bas","Eos","Neu")]

dput(colnames(dat2))
## c("X1", "X2", "value", "obs", "pred", "Bas", "Eos", "Neu", "Mono", 
## "Bnv", "Bmem", "CD4nv", "CD4mem", "Treg", "CD8nv", "CD8mem", 
## "NK")
plotval7a<-ggplot() + 
  geom_scatterpie(aes(x=obs, y=pred, r=2), data=dat2, cols=c("Bas", 
                                                               "Bnv", "Bmem", 
                                                               "CD4nv", "CD4mem",
                                                               "CD8nv", "CD8mem",
                                                               "Eos", "Mono", "Neu", 
                                                               "NK", "Treg"))+
  xlab("Observed (%)") +#xlab("\nSample") +
        ylab("Predicted (%)") +#ylab("Cell %\n") + 
  geom_smooth(method = "lm", se = FALSE)+
        geom_abline(intercept=0, slope=1, linetype="dashed", color = "black")+ 
        theme_classic(base_size = 20)+ 
        ggtitle("Artificial mixtures isolated blood cells adults (450k)") + scale_fill_manual(values=groups)+
        theme(legend.position = "none")+
  annotation_custom(tableGrob(matrixr, rows=NULL), 
                    xmin=40, xmax=60, ymin=0, ymax=18)
        

plotval7a

library(readxl)
Extended_library_1200_EPIC_fix <- read_excel("Figures tables/Extended_library_1200_EPIC_fix.xlsx", 
    sheet = "Extended_library_1200_EPIC_fix")
## New names:
## * `` -> ...1
Extended_library_1500_450k_fix <- read_excel("Figures tables/Extended_library_1200_EPIC_fix.xlsx", 
    sheet = "Extended_library_1500_450k_fix")
## New names:
## * `` -> ...1
minfi_pickcompprobes_1200 <- read_excel("Figures tables/Extended_library_1200_EPIC_fix.xlsx", 
    sheet = "minfi_pickcompprobes_1200")
## New names:
## * `` -> ...1
table(Extended_library_1200_EPIC_fix$Cell_deconv, Extended_library_1200_EPIC_fix$Relation_to_UCSC_CpG_Island, useNA = "ifany")
##         
##          Island N_Shelf N_Shore S_Shelf S_Shore <NA>
##   Bas         1       5       4       0       4   91
##   Bmem        3       5      16       2       4   82
##   Bnv         0       2       4       6       7   77
##   CD4mem      8       4      11       3       4   43
##   CD4nv       2       7      11       0      10   49
##   CD8mem     13       3      15       4      13   65
##   CD8nv       3       5      16       2       9   53
##   Eos         5       5       4       5       4   76
##   Mono        0       2       7       6       3  100
##   Neu         1       2       4       6       5   69
##   NK          9       7       5       7       7   68
##   Treg        5       2      14       5       8   93
table(Extended_library_1500_450k_fix$Cell_deconv, Extended_library_1500_450k_fix$Relation_to_UCSC_CpG_Island, useNA = "ifany")
##         
##          Island N_Shelf N_Shore S_Shelf S_Shore <NA>
##   Bas         7      12      15      11       9   76
##   Bmem        8       9      20       4      14   64
##   Bnv         4       7      15      12      14   68
##   CD4mem     19       8      20       8      10   36
##   CD4nv      10      10      15       2       6   57
##   CD8mem     21       7      26       6      20   48
##   CD8nv       7       4      21       4      18   49
##   Eos         7      14      12       6       8   74
##   Mono        2      18      13      17      14  113
##   Neu         2       6      15      11       7   52
##   NK         13       9      17       7      20   61
##   Treg       22      11      29      10      30   79
table(minfi_pickcompprobes_1200$Cell_deconv, minfi_pickcompprobes_1200$Relation_to_UCSC_CpG_Island, useNA = "ifany")
##         
##          Island N_Shelf N_Shore S_Shelf S_Shore <NA>
##   Bas         0       3       5       2       1   86
##   Bmem        3       5       5       5       6   70
##   Bnv         2       3       3       3       8   91
##   CD4mem     20       5      11       3       6   54
##   CD4nv       6       4       7       1       7   75
##   CD8mem      8       4      11       3       9   65
##   CD8nv       5       5       9       4      10   67
##   Eos         8       6       8       8       5   76
##   Mono        3       4       2       6       1   84
##   Neu         0       1       2       0       1   84
##   NK          3       2       5       4       9   77
##   Treg        4       4       9       5       6   73
library(BAGS)
## Loading required package: breastCancerVDX
library(missMethyl)

#read C2 or any of the curated DB gmt file downloaded from http://software.broadinstitute.org/gsea/msigdb/genesets.jsp?collection=C2
load("D:/OneDrive/OneDrive - Dartmouth College/Crossreactive/annotationEPICb5updated2.rda")
MSigDB_C7_CP <- ReadGMT("D:/OneDrive/OneDrive - Dartmouth College/GSEA/c7.all.v7.2.entrez.gmt")

#perform gsameth
dput(levels(as.factor(Extended_library_1200_EPIC_fix$Cell_deconv)))
## c("Bas", "Bmem", "Bnv", "CD4mem", "CD4nv", "CD8mem", "CD8nv", 
## "Eos", "Mono", "Neu", "NK", "Treg")
for (i in cellTypes){
try(gsa.MSigDB <- gsameth(sig.cpg=Extended_library_1200_EPIC_fix$IlmnID[Extended_library_1200_EPIC_fix$Cell_deconv==i], all.cpg=rownames(Ext_deconv_betas3), collection=MSigDB_C7_CP,array.type = "EPIC", anno = annotDF))
#  Multiple symbols ignored for one or more aliases
#sort by FDR p-value & only keep <0.05
print(i)
try(gsa.MSigDB <- as.data.frame(gsa.MSigDB[order(gsa.MSigDB[,4]),]))
try(print(head(gsa.MSigDB)))
try(sigMSigDB <- gsa.MSigDB[gsa.MSigDB$FDR<0.05,])
try(print(head(sigMSigDB)))
#write.csv(gsa.MSigDB, file=paste0(i,"GSEACP7_IDOL_EPIC.csv"))
}
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type,  : 
##   There are no genes annotated to the significant CpGs
## [1] "CD4T"
## Error in h(simpleError(msg, call)) : 
##   error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'gsa.MSigDB' not found
## Error in h(simpleError(msg, call)) : 
##   error in evaluating the argument 'x' in selecting a method for function 'head': object 'gsa.MSigDB' not found
## Error in try(sigMSigDB <- gsa.MSigDB[gsa.MSigDB$FDR < 0.05, ]) : 
##   object 'gsa.MSigDB' not found
## Error in h(simpleError(msg, call)) : 
##   error in evaluating the argument 'x' in selecting a method for function 'head': object 'sigMSigDB' not found
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type,  : 
##   There are no genes annotated to the significant CpGs
## [1] "CD8T"
## Error in h(simpleError(msg, call)) : 
##   error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'gsa.MSigDB' not found
## Error in h(simpleError(msg, call)) : 
##   error in evaluating the argument 'x' in selecting a method for function 'head': object 'gsa.MSigDB' not found
## Error in try(sigMSigDB <- gsa.MSigDB[gsa.MSigDB$FDR < 0.05, ]) : 
##   object 'gsa.MSigDB' not found
## Error in h(simpleError(msg, call)) : 
##   error in evaluating the argument 'x' in selecting a method for function 'head': object 'sigMSigDB' not found
## All input CpGs are used for testing.
## [1] "Mono"
##                                                            N DE         P.DE
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP                        186  7 6.420622e-05
## GSE25087_FETAL_VS_ADULT_TREG_DN                          175  7 5.742126e-05
## GSE21927_SPLEEN_MONOCYTE_VS_GMCSF_GCSF_BONE_MARROW_UP    176  6 1.121582e-04
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_DN           188  7 1.678140e-04
## GSE5589_WT_VS_IL6_KO_LPS_STIM_MACROPHAGE_45MIN_UP        183  6 4.471926e-04
## GSE20198_UNTREATED_VS_IL12_IL18_TREATED_ACT_CD4_TCELL_UP 188  6 6.193481e-04
##                                                                FDR
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP                        0.1564064
## GSE25087_FETAL_VS_ADULT_TREG_DN                          0.1564064
## GSE21927_SPLEEN_MONOCYTE_VS_GMCSF_GCSF_BONE_MARROW_UP    0.1821449
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_DN           0.2043975
## GSE5589_WT_VS_IL6_KO_LPS_STIM_MACROPHAGE_45MIN_UP        0.4357445
## GSE20198_UNTREATED_VS_IL12_IL18_TREATED_ACT_CD4_TCELL_UP 0.5029107
## [1] N    DE   P.DE FDR 
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type,  : 
##   There are no genes annotated to the significant CpGs
## [1] "Bcell"
##                                                            N DE         P.DE
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP                        186  7 6.420622e-05
## GSE25087_FETAL_VS_ADULT_TREG_DN                          175  7 5.742126e-05
## GSE21927_SPLEEN_MONOCYTE_VS_GMCSF_GCSF_BONE_MARROW_UP    176  6 1.121582e-04
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_DN           188  7 1.678140e-04
## GSE5589_WT_VS_IL6_KO_LPS_STIM_MACROPHAGE_45MIN_UP        183  6 4.471926e-04
## GSE20198_UNTREATED_VS_IL12_IL18_TREATED_ACT_CD4_TCELL_UP 188  6 6.193481e-04
##                                                                FDR
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP                        0.1564064
## GSE25087_FETAL_VS_ADULT_TREG_DN                          0.1564064
## GSE21927_SPLEEN_MONOCYTE_VS_GMCSF_GCSF_BONE_MARROW_UP    0.1821449
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_DN           0.2043975
## GSE5589_WT_VS_IL6_KO_LPS_STIM_MACROPHAGE_45MIN_UP        0.4357445
## GSE20198_UNTREATED_VS_IL12_IL18_TREATED_ACT_CD4_TCELL_UP 0.5029107
## [1] N    DE   P.DE FDR 
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## [1] "NK"
##                                                            N DE         P.DE
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP             182  6 0.0001562057
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 192  6 0.0001723678
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN                    187  6 0.0002433178
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP           197  6 0.0001713382
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP      193  6 0.0002079336
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN                        190  6 0.0004095154
##                                                                FDR
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP             0.2370888
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 0.2370888
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN                    0.2370888
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP           0.2370888
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP      0.2370888
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN                        0.2372691
## [1] N    DE   P.DE FDR 
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type,  : 
##   There are no genes annotated to the significant CpGs
## [1] "Gran"
##                                                            N DE         P.DE
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP             182  6 0.0001562057
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 192  6 0.0001723678
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN                    187  6 0.0002433178
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP           197  6 0.0001713382
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP      193  6 0.0002079336
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN                        190  6 0.0004095154
##                                                                FDR
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP             0.2370888
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 0.2370888
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN                    0.2370888
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP           0.2370888
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP      0.2370888
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN                        0.2372691
## [1] N    DE   P.DE FDR 
## <0 rows> (or 0-length row.names)
MSigDB_all_CP <- ReadGMT("D:/OneDrive/OneDrive - Dartmouth College/GSEA/msigdb.v7.2.entrez.gmt")
for (i in cellTypes){
try(gsa.MSigDB <- gsameth(sig.cpg=Extended_library_1200_EPIC_fix$IlmnID[Extended_library_1200_EPIC_fix$Cell_deconv==i], all.cpg=rownames(Ext_deconv_betas3), collection=MSigDB_all_CP,array.type = "EPIC", anno = annotDF))
#  Multiple symbols ignored for one or more aliases
#sort by FDR p-value & only keep <0.05
print(i)
try(gsa.MSigDB <- as.data.frame(gsa.MSigDB[order(gsa.MSigDB[,4]),]))
try(print(head(gsa.MSigDB)))
try(sigMSigDB <- gsa.MSigDB[gsa.MSigDB$FDR<0.05,])
try(print(head(sigMSigDB)))
#write.csv(gsa.MSigDB, file=paste0(i,"GSEAall_IDOL_EPIC.csv"))
}
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type,  : 
##   There are no genes annotated to the significant CpGs
## [1] "CD4T"
##                                                            N DE         P.DE
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP             182  6 0.0001562057
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 192  6 0.0001723678
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN                    187  6 0.0002433178
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP           197  6 0.0001713382
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP      193  6 0.0002079336
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN                        190  6 0.0004095154
##                                                                FDR
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP             0.2370888
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 0.2370888
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN                    0.2370888
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP           0.2370888
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP      0.2370888
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN                        0.2372691
## [1] N    DE   P.DE FDR 
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type,  : 
##   There are no genes annotated to the significant CpGs
## [1] "CD8T"
##                                                            N DE         P.DE
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP             182  6 0.0001562057
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 192  6 0.0001723678
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN                    187  6 0.0002433178
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP           197  6 0.0001713382
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP      193  6 0.0002079336
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN                        190  6 0.0004095154
##                                                                FDR
## GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP             0.2370888
## GSE20366_EX_VIVO_VS_DEC205_CONVERSION_NAIVE_CD4_TCELL_UP 0.2370888
## GSE18893_TCONV_VS_TREG_24H_CULTURE_DN                    0.2370888
## GSE26343_WT_VS_NFAT5_KO_MACROPHAGE_LPS_STIM_UP           0.2370888
## GSE11961_MARGINAL_ZONE_BCELL_VS_PLASMA_CELL_DAY7_UP      0.2370888
## GSE22886_NAIVE_TCELL_VS_NKCELL_DN                        0.2372691
## [1] N    DE   P.DE FDR 
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## [1] "Mono"
##                                        N DE         P.DE       FDR
## ACTGCAG_MIR173P                      102  7 8.903152e-06 0.1384262
## GO_SUPRAMOLECULAR_FIBER_ORGANIZATION 684 16 5.909855e-06 0.1384262
## GO_CELLULAR_COMPONENT_MORPHOGENESIS  768 18 1.355839e-05 0.1405372
## ZHENG_BOUND_BY_FOXP3                 480 12 7.268918e-05 0.1854653
## MIR6730_3P                            80  5 5.070677e-05 0.1854653
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP    186  7 6.420622e-05 0.1854653
## [1] N    DE   P.DE FDR 
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type,  : 
##   There are no genes annotated to the significant CpGs
## [1] "Bcell"
##                                        N DE         P.DE       FDR
## ACTGCAG_MIR173P                      102  7 8.903152e-06 0.1384262
## GO_SUPRAMOLECULAR_FIBER_ORGANIZATION 684 16 5.909855e-06 0.1384262
## GO_CELLULAR_COMPONENT_MORPHOGENESIS  768 18 1.355839e-05 0.1405372
## ZHENG_BOUND_BY_FOXP3                 480 12 7.268918e-05 0.1854653
## MIR6730_3P                            80  5 5.070677e-05 0.1854653
## GSE22886_NAIVE_CD8_TCELL_VS_DC_UP    186  7 6.420622e-05 0.1854653
## [1] N    DE   P.DE FDR 
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## [1] "NK"
##                                             N DE         P.DE        FDR
## RUTELLA_RESPONSE_TO_HGF_DN                228  9 1.911011e-06 0.05942478
## RUTELLA_RESPONSE_TO_CSF2RB_AND_IL4_DN     308  9 9.307952e-06 0.14472003
## RYAN_MANTLE_CELL_LYMPHOMA_NOTCH_DIRECT_UP 146  7 2.131446e-05 0.20813303
## HAY_BONE_MARROW_NK_CELLS                  347  9 2.677296e-05 0.20813303
## MIR6815_3P                                195  7 6.151610e-05 0.38258091
## JI_RESPONSE_TO_FSH_UP                      63  4 1.615584e-04 0.44997862
## [1] N    DE   P.DE FDR 
## <0 rows> (or 0-length row.names)
## All input CpGs are used for testing.
## Error in getMappedEntrezIDs(sig.cpg = sig.cpg, all.cpg = all.cpg, array.type = array.type,  : 
##   There are no genes annotated to the significant CpGs
## [1] "Gran"
##                                             N DE         P.DE        FDR
## RUTELLA_RESPONSE_TO_HGF_DN                228  9 1.911011e-06 0.05942478
## RUTELLA_RESPONSE_TO_CSF2RB_AND_IL4_DN     308  9 9.307952e-06 0.14472003
## RYAN_MANTLE_CELL_LYMPHOMA_NOTCH_DIRECT_UP 146  7 2.131446e-05 0.20813303
## HAY_BONE_MARROW_NK_CELLS                  347  9 2.677296e-05 0.20813303
## MIR6815_3P                                195  7 6.151610e-05 0.38258091
## JI_RESPONSE_TO_FSH_UP                      63  4 1.615584e-04 0.44997862
## [1] N    DE   P.DE FDR 
## <0 rows> (or 0-length row.names)
#perform gsameth for the single-cell database (later known as curated 8)
MSigDB_sc<- ReadGMT("D:/OneDrive/OneDrive - Dartmouth College/GSEA/scsig.all.v1.0.1.entrez.gmt")
gsa.MSigDBsc <- gsameth(sig.cpg=Extended_library_1200_EPIC_fix$IlmnID, all.cpg=rownames(Ext_deconv_betas3), collection=MSigDB_sc)
## All input CpGs are used for testing.
#  Multiple symbols ignored for one or more aliases
#sort by FDR p-value & only keep <0.05
gsa.MSigDBsc <- as.data.frame(gsa.MSigDBsc[order(gsa.MSigDBsc[,4]),])
sigMSigDBsc <- gsa.MSigDBsc[gsa.MSigDBsc$FDR<0.05,]
head(sigMSigDBsc)
##                                                  N DE         P.DE          FDR
## Aizarani_Liver_C1_NK_NKT_cells_1               145 15 1.731999e-07 4.451237e-05
## Gao_Large_Intestine_24W_C11_Paneth_Like_Cell   304 18 1.874468e-05 2.163580e-03
## Hu_Fetal_Retina_Microglia                      355 20 2.525580e-05 2.163580e-03
## Aizarani_Liver_C23_Kupffer_Cells_3             206 13 2.421010e-04 1.244399e-02
## Gao_Large_Intestine_Adult_CI_Mesenchymal_Cells 326 18 2.252025e-04 1.244399e-02
#write.csv(gsa.MSigDBsc, file="GSEAsc_ 150 filtered_BN2.csv")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19043)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
##  [1] grid      stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] missMethyl_1.25.0                                  
##  [2] BAGS_2.31.0                                        
##  [3] breastCancerVDX_1.29.0                             
##  [4] readxl_1.3.1                                       
##  [5] FlowSorted.Blood.450k_1.29.0                       
##  [6] readr_1.4.0                                        
##  [7] tableone_0.12.0                                    
##  [8] ggpubr_0.4.0                                       
##  [9] ggsignif_0.6.1                                     
## [10] IlluminaHumanMethylation450kmanifest_0.4.0         
## [11] FlowSorted.CordBloodCombined.450k_1.7.0            
## [12] IlluminaHumanMethylationEPICanno.ilm10b2.hg19_0.6.0
## [13] IlluminaHumanMethylationEPICmanifest_0.3.0         
## [14] IDOL_0.0.0.9000                                    
## [15] wateRmelon_1.35.0                                  
## [16] illuminaio_0.33.0                                  
## [17] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0 
## [18] ROC_1.67.0                                         
## [19] lumi_2.43.0                                        
## [20] methylumi_2.37.0                                   
## [21] FDb.InfiniumMethylation.hg19_2.2.0                 
## [22] org.Hs.eg.db_3.13.0                                
## [23] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2            
## [24] GenomicFeatures_1.43.8                             
## [25] AnnotationDbi_1.53.2                               
## [26] scales_1.1.1                                       
## [27] future.apply_1.7.0                                 
## [28] BiocParallel.FutureParam_0.2.0                     
## [29] doFuture_0.12.0                                    
## [30] future_1.21.0                                      
## [31] BiocParallel_1.25.5                                
## [32] limma_3.47.15                                      
## [33] FlowSorted.BloodExtended.EPIC_0.99.0               
## [34] FlowSorted.Blood.EPIC_1.9.0                        
## [35] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
## [36] nlme_3.1-152                                       
## [37] quadprog_1.5-8                                     
## [38] genefilter_1.73.1                                  
## [39] reshape_0.8.8                                      
## [40] gridExtra_2.3                                      
## [41] reshape2_1.4.4                                     
## [42] scatterpie_0.1.6                                   
## [43] ggplot2_3.3.3                                      
## [44] RColorBrewer_1.1-2                                 
## [45] pheatmap_1.0.12                                    
## [46] ENmix_1.27.11                                      
## [47] doParallel_1.0.16                                  
## [48] minfi_1.37.1                                       
## [49] bumphunter_1.33.0                                  
## [50] locfit_1.5-9.4                                     
## [51] iterators_1.0.13                                   
## [52] foreach_1.5.1                                      
## [53] Biostrings_2.59.4                                  
## [54] XVector_0.31.1                                     
## [55] SummarizedExperiment_1.21.3                        
## [56] Biobase_2.51.0                                     
## [57] MatrixGenerics_1.3.1                               
## [58] matrixStats_0.58.0                                 
## [59] GenomicRanges_1.43.4                               
## [60] GenomeInfoDb_1.27.13                               
## [61] IRanges_2.25.11                                    
## [62] S4Vectors_0.29.19                                  
## [63] sesame_1.9.19                                      
## [64] sesameData_1.9.14                                  
## [65] rmarkdown_2.8                                      
## [66] ExperimentHub_1.99.7                               
## [67] AnnotationHub_2.99.6                               
## [68] BiocFileCache_1.99.8                               
## [69] dbplyr_2.1.1                                       
## [70] BiocGenerics_0.37.6                                
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3                rtracklayer_1.51.5           
##   [3] tidyr_1.1.3                   bit64_4.0.5                  
##   [5] knitr_1.33                    DelayedArray_0.17.13         
##   [7] data.table_1.14.0             KEGGREST_1.31.2              
##   [9] RCurl_1.98-1.3                GEOquery_2.59.0              
##  [11] generics_0.1.0                preprocessCore_1.53.2        
##  [13] RSQLite_2.2.7                 proxy_0.4-25                 
##  [15] bit_4.0.4                     xml2_1.3.2                   
##  [17] httpuv_1.6.1                  assertthat_0.2.1             
##  [19] xfun_0.23                     hms_1.1.0                    
##  [21] jquerylib_0.1.4               evaluate_0.14                
##  [23] DNAcopy_1.65.0                promises_1.2.0.1             
##  [25] fansi_0.4.2                   restfulr_0.0.13              
##  [27] scrime_1.3.5                  progress_1.2.2               
##  [29] caTools_1.18.2                DBI_1.1.1                    
##  [31] geneplotter_1.69.0            purrr_0.3.4                  
##  [33] ellipsis_0.3.2                dplyr_1.0.6                  
##  [35] backports_1.2.1               survey_4.0                   
##  [37] annotate_1.69.2               biomaRt_2.47.9               
##  [39] sparseMatrixStats_1.3.8       vctrs_0.3.8                  
##  [41] abind_1.4-5                   cachem_1.0.5                 
##  [43] withr_2.4.2                   ggforce_0.3.3                
##  [45] GenomicAlignments_1.27.2      prettyunits_1.1.1            
##  [47] mclust_5.4.7                  cluster_2.1.2                
##  [49] RPMM_1.25                     crayon_1.4.1                 
##  [51] pkgconfig_2.0.3               labeling_0.4.2               
##  [53] tweenr_1.0.2                  rlang_0.4.11                 
##  [55] globals_0.14.0                lifecycle_1.0.0              
##  [57] nleqslv_3.3.2                 MetricsWeighted_0.5.2        
##  [59] filelock_1.0.2                affyio_1.61.1                
##  [61] cellranger_1.1.0              randomForest_4.6-14          
##  [63] polyclip_1.10-0               rngtools_1.5                 
##  [65] base64_2.0                    Matrix_1.3-3                 
##  [67] carData_3.0-4                 zoo_1.8-9                    
##  [69] Rhdf5lib_1.13.7               png_0.1-7                    
##  [71] rjson_0.2.20                  bitops_1.0-7                 
##  [73] KernSmooth_2.23-20            rhdf5filters_1.3.5           
##  [75] blob_1.2.1                    DelayedMatrixStats_1.13.7    
##  [77] doRNG_1.8.2                   stringr_1.4.0                
##  [79] nor1mix_1.3-0                 parallelly_1.25.0            
##  [81] rstatix_0.7.0                 lpSolve_5.6.15               
##  [83] memoise_2.0.0                 magrittr_2.0.1               
##  [85] plyr_1.8.6                    gplots_3.1.1                 
##  [87] zlibbioc_1.37.0               compiler_4.1.0               
##  [89] BiocIO_1.1.2                  cli_2.5.0                    
##  [91] Rsamtools_2.7.2               affy_1.69.1                  
##  [93] listenv_0.8.0                 MASS_7.3-54                  
##  [95] mgcv_1.8-35                   tidyselect_1.1.1             
##  [97] forcats_0.5.1                 stringi_1.6.2                
##  [99] highr_0.9                     mitools_2.4                  
## [101] yaml_2.2.1                    askpass_1.1                  
## [103] sass_0.4.0                    tools_4.1.0                  
## [105] rio_0.5.26                    rstudioapi_0.13              
## [107] foreign_0.8-81                farver_2.1.0                 
## [109] digest_0.6.27                 rvcheck_0.1.8                
## [111] BiocManager_1.30.15           shiny_1.6.0                  
## [113] wheatmap_0.1.0                Rcpp_1.0.6                   
## [115] car_3.0-10                    siggenes_1.65.0              
## [117] broom_0.7.6                   BiocVersion_3.13.1           
## [119] later_1.2.0                   httr_1.4.2                   
## [121] colorspace_2.0-1              XML_3.99-0.6                 
## [123] splines_4.1.0                 statmod_1.4.36               
## [125] multtest_2.47.0               irr_0.84.1                   
## [127] xtable_1.8-4                  jsonlite_1.7.2               
## [129] dynamicTreeCut_1.63-1         R6_2.5.0                     
## [131] pillar_1.6.1                  htmltools_0.5.1.1            
## [133] mime_0.10                     glue_1.4.2                   
## [135] fastmap_1.1.0                 class_7.3-19                 
## [137] interactiveDisplayBase_1.29.0 beanplot_1.2                 
## [139] codetools_0.2-18              utf8_1.2.1                   
## [141] lattice_0.20-44               bslib_0.2.5.1                
## [143] tibble_3.1.2                  BiasedUrn_1.07               
## [145] curl_4.3.1                    gtools_3.8.2                 
## [147] zip_2.1.1                     openxlsx_4.2.3               
## [149] openssl_1.4.4                 survival_3.2-11              
## [151] munsell_0.5.0                 e1071_1.7-6                  
## [153] rhdf5_2.35.5                  GenomeInfoDbData_1.2.6       
## [155] labelled_2.8.0                HDF5Array_1.19.17            
## [157] impute_1.65.0                 haven_2.4.1                  
## [159] gtable_0.3.0